or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-database-api.mddata-utilities.mderror-handling.mdindex.mdsqlalchemy-integration.md

core-database-api.mddocs/

0

# Core Database API

1

2

Full DB API 2.0 compliant interface providing connection management, query execution, and result fetching for HiveServer2 implementations (Impala and Hive).

3

4

## Capabilities

5

6

### Connection Management

7

8

Establishes and manages connections to HiveServer2 with comprehensive authentication and transport options.

9

10

```python { .api }

11

def connect(host='localhost', port=21050, database=None, timeout=None,

12

use_ssl=False, ca_cert=None, auth_mechanism='NOSASL', user=None,

13

password=None, kerberos_service_name='impala', use_ldap=None,

14

ldap_user=None, ldap_password=None, use_kerberos=None,

15

protocol=None, krb_host=None, use_http_transport=False,

16

http_path='', auth_cookie_names=None, http_cookie_names=None,

17

retries=3, jwt=None, user_agent=None,

18

get_user_custom_headers_func=None):

19

"""

20

Get a connection to HiveServer2 (HS2).

21

22

Parameters:

23

host (str): The hostname for HS2. For Impala, this can be any of the impalads.

24

port (int): The port number for HS2. Default is 21050 for Impala.

25

database (str): The default database. If None, implementation-dependent.

26

timeout (int): Connection timeout in seconds. Default is no timeout.

27

use_ssl (bool): Enable SSL.

28

ca_cert (str): Local path to the third-party CA certificate.

29

auth_mechanism (str): Authentication mechanism ('NOSASL', 'PLAIN', 'GSSAPI', 'LDAP', 'JWT').

30

user (str): LDAP user, if applicable.

31

password (str): LDAP password, if applicable.

32

kerberos_service_name (str): Service principal name. Default is 'impala'.

33

use_http_transport (bool): Use HTTP transport instead of binary transport.

34

http_path (str): Path in the HTTP URL when using HTTP transport.

35

auth_cookie_names (list or str): Cookie names for cookie-based authentication.

36

http_cookie_names (list or str): Cookie names for session management.

37

retries (int): Number of connection retries. Default is 3.

38

jwt (str): JSON Web Token for JWT authentication.

39

user_agent (str): Custom user agent string.

40

get_user_custom_headers_func (callable): Function to get custom HTTP headers.

41

42

Returns:

43

HiveServer2Connection: Connection object implementing DB API 2.0

44

"""

45

```

46

47

### Connection Objects

48

49

Connection objects provide DB API 2.0 compliant database connection management.

50

51

```python { .api }

52

class HiveServer2Connection:

53

"""Main connection class implementing DB API 2.0 Connection interface."""

54

55

def close(self):

56

"""Close the connection."""

57

58

def commit(self):

59

"""Commit current transaction (no-op for Impala/Hive)."""

60

61

def rollback(self):

62

"""Rollback current transaction (no-op for Impala/Hive)."""

63

64

def cursor(self, user=None, configuration=None, convert_types=True,

65

dictify=False, fetch_error=True, close_finished_queries=True,

66

convert_strings_to_unicode=True):

67

"""

68

Return a new cursor object using the connection.

69

70

Parameters:

71

user (str): Optional user for the cursor session

72

configuration (dict): Configuration overlay for the HS2 session

73

convert_types (bool): Convert timestamps and decimals to Python types

74

dictify (bool): Return rows as dictionaries instead of tuples

75

fetch_error (bool): Whether to fetch error details on query failure

76

close_finished_queries (bool): Auto-close finished queries

77

convert_strings_to_unicode (bool): Convert strings to Unicode

78

79

Returns:

80

HiveServer2Cursor or HiveServer2DictCursor: Cursor object

81

"""

82

83

def reconnect(self):

84

"""Reconnect to the database."""

85

86

def kerberized(self):

87

"""Check if connection uses Kerberos authentication."""

88

89

def __enter__(self):

90

"""Context manager entry."""

91

92

def __exit__(self, exc_type, exc_val, exc_tb):

93

"""Context manager exit."""

94

```

95

96

### Cursor Objects

97

98

Cursor objects provide DB API 2.0 compliant query execution and result fetching.

99

100

```python { .api }

101

class HiveServer2Cursor:

102

"""Cursor implementation for executing queries."""

103

104

# Properties

105

description: list

106

"""Sequence of 7-item sequences describing each result column."""

107

108

rowcount: int

109

"""Number of rows that the last execute() produced or affected."""

110

111

arraysize: int

112

"""Read/write attribute specifying number of rows to fetch at a time."""

113

114

buffersize: int

115

"""Buffer size for fetching results."""

116

117

# Methods

118

def execute(self, query, parameters=None):

119

"""

120

Execute a database operation (query or command).

121

122

Parameters:

123

query (str): SQL query to execute

124

parameters (dict): Query parameters for substitution

125

"""

126

127

def executemany(self, query, seq_of_parameters):

128

"""

129

Execute a database operation repeatedly.

130

131

Parameters:

132

query (str): SQL query to execute

133

seq_of_parameters (sequence): Sequence of parameter dictionaries

134

"""

135

136

def fetchone(self):

137

"""

138

Fetch the next row of a query result set.

139

140

Returns:

141

tuple or None: Next row or None when no more data available

142

"""

143

144

def fetchmany(self, size=None):

145

"""

146

Fetch the next set of rows of a query result set.

147

148

Parameters:

149

size (int): Number of rows to fetch. Uses arraysize if None.

150

151

Returns:

152

list: List of tuples representing rows

153

"""

154

155

def fetchall(self):

156

"""

157

Fetch all (remaining) rows of a query result set.

158

159

Returns:

160

list: List of tuples representing all rows

161

"""

162

163

def close(self):

164

"""Close the cursor."""

165

166

def get_profile(self):

167

"""Get the query profile information."""

168

169

def get_summary(self):

170

"""Get the query execution summary."""

171

172

def get_log(self):

173

"""Get the query execution log."""

174

175

def ping(self):

176

"""Ping the server to check connection status."""

177

178

def __iter__(self):

179

"""Iterator interface for cursor results."""

180

181

def __enter__(self):

182

"""Context manager entry."""

183

184

def __exit__(self, exc_type, exc_val, exc_tb):

185

"""Context manager exit."""

186

```

187

188

### Dictionary Cursor

189

190

Specialized cursor that returns results as dictionaries instead of tuples.

191

192

```python { .api }

193

class HiveServer2DictCursor(HiveServer2Cursor):

194

"""

195

Dictionary cursor implementation that returns rows as dictionaries.

196

197

Inherits from HiveServer2Cursor but returns each row as a dictionary

198

with column names as keys instead of tuples.

199

"""

200

201

def fetchone(self):

202

"""

203

Fetch the next row as a dictionary.

204

205

Returns:

206

dict or None: Next row as dictionary with column names as keys,

207

or None when no more data available

208

"""

209

210

def fetchmany(self, size=None):

211

"""

212

Fetch the next set of rows as dictionaries.

213

214

Parameters:

215

size (int): Number of rows to fetch. Uses arraysize if None.

216

217

Returns:

218

list: List of dictionaries representing rows

219

"""

220

221

def fetchall(self):

222

"""

223

Fetch all (remaining) rows as dictionaries.

224

225

Returns:

226

list: List of dictionaries representing all rows

227

"""

228

```

229

230

### DB API 2.0 Type Objects

231

232

Type objects for proper data type handling according to DB API 2.0 specification.

233

234

```python { .api }

235

# Type objects for data type identification

236

STRING: _DBAPITypeObject

237

"""Type object for string data types. Matches 'STRING' type."""

238

239

BINARY: _DBAPITypeObject

240

"""Type object for binary data types. Matches 'BINARY' type."""

241

242

NUMBER: _DBAPITypeObject

243

"""

244

Type object for numeric data types.

245

Matches: 'BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', 'DECIMAL'

246

"""

247

248

DATETIME: _DBAPITypeObject

249

"""Type object for datetime data types. Matches 'TIMESTAMP' type."""

250

251

DATE: _DBAPITypeObject

252

"""Type object for date data types. Matches 'DATE' type."""

253

254

ROWID: _DBAPITypeObject

255

"""Type object for row identifier data types. Empty values set."""

256

```

257

258

### Date and Time Functions

259

260

DB API 2.0 compliant date and time constructors.

261

262

```python { .api }

263

def Date(year, month, day):

264

"""

265

Construct a date value.

266

267

Parameters:

268

year (int): Year

269

month (int): Month (1-12)

270

day (int): Day (1-31)

271

272

Returns:

273

datetime.date: Date object

274

"""

275

276

def Time(hour, minute, second):

277

"""

278

Construct a time value.

279

280

Parameters:

281

hour (int): Hour (0-23)

282

minute (int): Minute (0-59)

283

second (int): Second (0-59)

284

285

Returns:

286

datetime.time: Time object

287

"""

288

289

def Timestamp(year, month, day, hour, minute, second):

290

"""

291

Construct a timestamp value.

292

293

Parameters:

294

year (int): Year

295

month (int): Month (1-12)

296

day (int): Day (1-31)

297

hour (int): Hour (0-23)

298

minute (int): Minute (0-59)

299

second (int): Second (0-59)

300

301

Returns:

302

datetime.datetime: Timestamp object

303

"""

304

305

def DateFromTicks(ticks):

306

"""

307

Construct a date from Unix timestamp.

308

309

Parameters:

310

ticks (float): Unix timestamp

311

312

Returns:

313

datetime.date: Date object

314

"""

315

316

def TimeFromTicks(ticks):

317

"""

318

Construct a time from Unix timestamp.

319

320

Parameters:

321

ticks (float): Unix timestamp

322

323

Returns:

324

datetime.time: Time object

325

"""

326

327

def TimestampFromTicks(ticks):

328

"""

329

Construct a timestamp from Unix timestamp.

330

331

Parameters:

332

ticks (float): Unix timestamp

333

334

Returns:

335

datetime.datetime: Timestamp object

336

"""

337

338

def Binary(data):

339

"""

340

Construct binary data object.

341

342

Parameters:

343

data (bytes): Binary data

344

345

Returns:

346

memoryview: Binary data object

347

"""

348

```

349

350

### Module Constants

351

352

DB API 2.0 module-level constants indicating compliance and capabilities.

353

354

```python { .api }

355

apilevel = '2.0'

356

"""String constant stating the supported DB API level."""

357

358

threadsafety = 1

359

"""Integer constant stating the thread safety level."""

360

361

paramstyle = 'pyformat'

362

"""String constant stating the parameter style."""

363

364

AUTH_MECHANISMS = ['NOSASL', 'PLAIN', 'GSSAPI', 'LDAP', 'JWT']

365

"""List of supported authentication mechanisms."""

366

```

367

368

## Usage Examples

369

370

### Basic Connection and Query

371

372

```python

373

from impala.dbapi import connect

374

375

# Connect with basic authentication

376

conn = connect(

377

host='impala-cluster.example.com',

378

port=21050,

379

auth_mechanism='PLAIN',

380

user='username',

381

password='password'

382

)

383

384

# Create cursor and execute query

385

cursor = conn.cursor()

386

cursor.execute("SELECT COUNT(*) FROM my_table")

387

result = cursor.fetchone()

388

print(f"Row count: {result[0]}")

389

390

# Clean up

391

cursor.close()

392

conn.close()

393

```

394

395

### Context Manager Usage

396

397

```python

398

from impala.dbapi import connect

399

400

# Using connection as context manager

401

with connect(host='impala-host', port=21050) as conn:

402

with conn.cursor() as cursor:

403

cursor.execute("SELECT * FROM my_table LIMIT 5")

404

for row in cursor.fetchall():

405

print(row)

406

```

407

408

### Parameterized Queries

409

410

```python

411

from impala.dbapi import connect

412

413

conn = connect(host='impala-host', port=21050)

414

cursor = conn.cursor()

415

416

# Parameterized query using pyformat style

417

query = "SELECT * FROM users WHERE age > %(min_age)s AND city = %(city)s"

418

params = {'min_age': 25, 'city': 'San Francisco'}

419

420

cursor.execute(query, params)

421

results = cursor.fetchall()

422

423

for row in results:

424

print(row)

425

426

cursor.close()

427

conn.close()

428

```

429

430

### Kerberos Authentication

431

432

```python

433

from impala.dbapi import connect

434

435

# Connect using Kerberos authentication

436

conn = connect(

437

host='secure-impala.example.com',

438

port=21050,

439

auth_mechanism='GSSAPI',

440

kerberos_service_name='impala',

441

use_ssl=True

442

)

443

444

cursor = conn.cursor()

445

cursor.execute("SELECT version()")

446

version = cursor.fetchone()

447

print(f"Impala version: {version[0]}")

448

449

cursor.close()

450

conn.close()

451

```