or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-operations.mddata-loading.mddatabase-api.mddataset-management.mdindex.mdmodels-routines.mdquery-operations.mdquery-parameters.mdschema-definition.mdtable-operations.md

table-operations.mddocs/

0

# Table Operations

1

2

Table creation, schema management, data operations, and metadata management. BigQuery tables store structured data with enforced schemas and support various configurations including partitioning, clustering, and expiration policies.

3

4

## Capabilities

5

6

### Table Definition

7

8

Create and configure BigQuery tables with schemas, partitioning, and metadata.

9

10

```python { .api }

11

class Table:

12

def __init__(

13

self,

14

table_ref: Union[TableReference, str],

15

schema: List[SchemaField] = None

16

):

17

"""

18

Initialize a Table.

19

20

Args:

21

table_ref: Table reference string or TableReference object.

22

schema: Table schema as list of SchemaField objects.

23

"""

24

25

@property

26

def reference(self) -> TableReference:

27

"""Table reference object."""

28

29

@property

30

def table_id(self) -> str:

31

"""Table ID."""

32

33

@property

34

def dataset_id(self) -> str:

35

"""Dataset ID containing the table."""

36

37

@property

38

def project(self) -> str:

39

"""Project ID containing the table."""

40

41

@property

42

def schema(self) -> List[SchemaField]:

43

"""Table schema."""

44

45

@schema.setter

46

def schema(self, value: List[SchemaField]): ...

47

48

@property

49

def friendly_name(self) -> str:

50

"""Human-readable table name."""

51

52

@friendly_name.setter

53

def friendly_name(self, value: str): ...

54

55

@property

56

def description(self) -> str:

57

"""Table description."""

58

59

@description.setter

60

def description(self, value: str): ...

61

62

@property

63

def num_bytes(self) -> int:

64

"""Size of table in bytes."""

65

66

@property

67

def num_rows(self) -> int:

68

"""Number of rows in table."""

69

70

@property

71

def created(self) -> datetime.datetime:

72

"""Table creation timestamp."""

73

74

@property

75

def modified(self) -> datetime.datetime:

76

"""Table last modification timestamp."""

77

78

@property

79

def expires(self) -> datetime.datetime:

80

"""Table expiration timestamp."""

81

82

@expires.setter

83

def expires(self, value: datetime.datetime): ...

84

85

@property

86

def labels(self) -> Dict[str, str]:

87

"""Labels for organizing tables."""

88

89

@labels.setter

90

def labels(self, value: Dict[str, str]): ...

91

92

@property

93

def time_partitioning(self) -> TimePartitioning:

94

"""Time partitioning configuration."""

95

96

@time_partitioning.setter

97

def time_partitioning(self, value: TimePartitioning): ...

98

99

@property

100

def range_partitioning(self) -> RangePartitioning:

101

"""Range partitioning configuration."""

102

103

@range_partitioning.setter

104

def range_partitioning(self, value: RangePartitioning): ...

105

106

@property

107

def clustering_fields(self) -> List[str]:

108

"""Fields used for clustering."""

109

110

@clustering_fields.setter

111

def clustering_fields(self, value: List[str]): ...

112

113

@property

114

def require_partition_filter(self) -> bool:

115

"""Require partition filter in queries."""

116

117

@require_partition_filter.setter

118

def require_partition_filter(self, value: bool): ...

119

```

120

121

### Table Reference

122

123

Reference tables by project, dataset, and table ID for API operations.

124

125

```python { .api }

126

class TableReference:

127

def __init__(self, dataset_ref: DatasetReference, table_id: str):

128

"""

129

Reference to a BigQuery table.

130

131

Args:

132

dataset_ref: Dataset reference containing the table.

133

table_id: Table ID.

134

"""

135

136

@property

137

def dataset_id(self) -> str:

138

"""Dataset ID."""

139

140

@property

141

def project(self) -> str:

142

"""Project ID."""

143

144

@property

145

def table_id(self) -> str:

146

"""Table ID."""

147

148

@property

149

def path(self) -> str:

150

"""Full table path (project:dataset.table)."""

151

152

@classmethod

153

def from_string(

154

cls,

155

table_id: str,

156

default_project: str = None

157

) -> TableReference:

158

"""

159

Create TableReference from string.

160

161

Args:

162

table_id: Table ID with optional project and dataset.

163

default_project: Default project if not specified.

164

165

Returns:

166

TableReference: Table reference object.

167

"""

168

```

169

170

### Partitioning Configuration

171

172

Configure time-based and range-based table partitioning for query performance and cost optimization.

173

174

```python { .api }

175

class TimePartitioning:

176

def __init__(

177

self,

178

type_: str = None,

179

field: str = None,

180

expiration_ms: int = None,

181

require_partition_filter: bool = None,

182

):

183

"""

184

Time-based partitioning configuration.

185

186

Args:

187

type_: Partitioning type (DAY, HOUR, MONTH, YEAR).

188

field: Field to partition by (None for ingestion time).

189

expiration_ms: Partition expiration in milliseconds.

190

require_partition_filter: Require partition filter in queries.

191

"""

192

193

@property

194

def type_(self) -> str:

195

"""Partitioning type."""

196

197

@property

198

def field(self) -> str:

199

"""Partitioning field."""

200

201

@property

202

def expiration_ms(self) -> int:

203

"""Partition expiration in milliseconds."""

204

205

@property

206

def require_partition_filter(self) -> bool:

207

"""Require partition filter in queries."""

208

209

class RangePartitioning:

210

def __init__(self, field: str = None, range_: PartitionRange = None):

211

"""

212

Range-based partitioning configuration.

213

214

Args:

215

field: Field to partition by.

216

range_: Range configuration for partitioning.

217

"""

218

219

@property

220

def field(self) -> str:

221

"""Partitioning field."""

222

223

@property

224

def range_(self) -> PartitionRange:

225

"""Range configuration."""

226

227

class PartitionRange:

228

def __init__(self, start: int = None, end: int = None, interval: int = None):

229

"""

230

Range definition for range partitioning.

231

232

Args:

233

start: Start of range (inclusive).

234

end: End of range (exclusive).

235

interval: Interval between partitions.

236

"""

237

238

@property

239

def start(self) -> int:

240

"""Range start (inclusive)."""

241

242

@property

243

def end(self) -> int:

244

"""Range end (exclusive)."""

245

246

@property

247

def interval(self) -> int:

248

"""Partition interval."""

249

```

250

251

### Table Types and Views

252

253

Support for different table types including views, materialized views, and external tables.

254

255

```python { .api }

256

# View configuration

257

@property

258

def view_query(self) -> str:

259

"""SQL query defining the view."""

260

261

@view_query.setter

262

def view_query(self, value: str): ...

263

264

@property

265

def view_use_legacy_sql(self) -> bool:

266

"""Use legacy SQL for view query."""

267

268

@view_use_legacy_sql.setter

269

def view_use_legacy_sql(self, value: bool): ...

270

271

# Materialized view configuration

272

@property

273

def mview_query(self) -> str:

274

"""SQL query defining the materialized view."""

275

276

@mview_query.setter

277

def mview_query(self, value: str): ...

278

279

@property

280

def mview_enable_refresh(self) -> bool:

281

"""Enable automatic refresh of materialized view."""

282

283

@mview_enable_refresh.setter

284

def mview_enable_refresh(self, value: bool): ...

285

286

@property

287

def mview_refresh_interval_ms(self) -> int:

288

"""Refresh interval in milliseconds."""

289

290

@mview_refresh_interval_ms.setter

291

def mview_refresh_interval_ms(self, value: int): ...

292

293

# External table configuration

294

@property

295

def external_data_configuration(self) -> ExternalConfig:

296

"""External data source configuration."""

297

298

@external_data_configuration.setter

299

def external_data_configuration(self, value: ExternalConfig): ...

300

```

301

302

## Usage Examples

303

304

### Create Standard Table

305

306

```python

307

from google.cloud import bigquery

308

309

client = bigquery.Client()

310

311

# Define table schema

312

schema = [

313

bigquery.SchemaField("transaction_id", "STRING", mode="REQUIRED"),

314

bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),

315

bigquery.SchemaField("amount", "NUMERIC", mode="REQUIRED", precision=10, scale=2),

316

bigquery.SchemaField("currency", "STRING", mode="REQUIRED"),

317

bigquery.SchemaField("transaction_date", "DATE", mode="REQUIRED"),

318

bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),

319

bigquery.SchemaField("metadata", "JSON", mode="NULLABLE"),

320

]

321

322

# Create table

323

table_id = f"{client.project}.transactions.daily_transactions"

324

table = bigquery.Table(table_id, schema=schema)

325

326

# Configure table properties

327

table.friendly_name = "Daily Transactions"

328

table.description = "Daily transaction records with user and payment information"

329

table.labels = {

330

"environment": "production",

331

"data_type": "financial"

332

}

333

334

# Set expiration (90 days)

335

table.expires = datetime.datetime.now() + datetime.timedelta(days=90)

336

337

# Create the table

338

table = client.create_table(table, exists_ok=True)

339

print(f"Created table {table.table_id}")

340

```

341

342

### Create Partitioned Table

343

344

```python

345

# Create time-partitioned table

346

schema = [

347

bigquery.SchemaField("event_id", "STRING", mode="REQUIRED"),

348

bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),

349

bigquery.SchemaField("event_type", "STRING", mode="REQUIRED"),

350

bigquery.SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED"),

351

bigquery.SchemaField("properties", "JSON", mode="NULLABLE"),

352

]

353

354

table = bigquery.Table(f"{client.project}.analytics.events", schema=schema)

355

356

# Configure time partitioning by event_timestamp field

357

table.time_partitioning = bigquery.TimePartitioning(

358

type_=bigquery.TimePartitioningType.DAY,

359

field="event_timestamp",

360

expiration_ms=30 * 24 * 60 * 60 * 1000, # 30 days

361

require_partition_filter=True

362

)

363

364

# Add clustering for better query performance

365

table.clustering_fields = ["event_type", "user_id"]

366

367

table = client.create_table(table, exists_ok=True)

368

print(f"Created partitioned table {table.table_id}")

369

```

370

371

### Create Range-Partitioned Table

372

373

```python

374

# Create range-partitioned table

375

schema = [

376

bigquery.SchemaField("customer_id", "INTEGER", mode="REQUIRED"),

377

bigquery.SchemaField("region_code", "INTEGER", mode="REQUIRED"),

378

bigquery.SchemaField("customer_name", "STRING", mode="REQUIRED"),

379

bigquery.SchemaField("signup_date", "DATE", mode="REQUIRED"),

380

]

381

382

table = bigquery.Table(f"{client.project}.customers.customers_by_region", schema=schema)

383

384

# Configure range partitioning by region_code

385

table.range_partitioning = bigquery.RangePartitioning(

386

field="region_code",

387

range_=bigquery.PartitionRange(start=0, end=1000, interval=100)

388

)

389

390

table = client.create_table(table, exists_ok=True)

391

print(f"Created range-partitioned table {table.table_id}")

392

```

393

394

### Create View

395

396

```python

397

# Create a view

398

view_query = """

399

SELECT

400

t.transaction_id,

401

t.user_id,

402

t.amount,

403

t.currency,

404

t.transaction_date,

405

u.user_name,

406

u.email

407

FROM `{}.transactions.daily_transactions` t

408

JOIN `{}.users.user_profiles` u ON t.user_id = u.user_id

409

WHERE t.transaction_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)

410

""".format(client.project, client.project)

411

412

view = bigquery.Table(f"{client.project}.reports.recent_transactions_with_users")

413

view.view_query = view_query

414

view.description = "Recent transactions joined with user information"

415

416

view = client.create_table(view, exists_ok=True)

417

print(f"Created view {view.table_id}")

418

```

419

420

### Create Materialized View

421

422

```python

423

# Create materialized view for aggregated data

424

mview_query = """

425

SELECT

426

DATE(created_at) as transaction_date,

427

currency,

428

COUNT(*) as transaction_count,

429

SUM(amount) as total_amount,

430

AVG(amount) as avg_amount

431

FROM `{}.transactions.daily_transactions`

432

GROUP BY DATE(created_at), currency

433

""".format(client.project)

434

435

mview = bigquery.Table(f"{client.project}.reports.daily_transaction_summary")

436

mview.mview_query = mview_query

437

mview.mview_enable_refresh = True

438

mview.mview_refresh_interval_ms = 60 * 60 * 1000 # 1 hour

439

440

mview = client.create_table(mview, exists_ok=True)

441

print(f"Created materialized view {mview.table_id}")

442

```

443

444

### Table Schema Evolution

445

446

```python

447

# Get existing table

448

table = client.get_table(f"{client.project}.transactions.daily_transactions")

449

current_schema = table.schema

450

451

# Add new fields to schema

452

new_schema = list(current_schema)

453

new_schema.extend([

454

bigquery.SchemaField("payment_method", "STRING", mode="NULLABLE"),

455

bigquery.SchemaField("merchant_id", "INTEGER", mode="NULLABLE"),

456

bigquery.SchemaField("risk_score", "FLOAT", mode="NULLABLE"),

457

])

458

459

# Update table schema

460

table.schema = new_schema

461

table = client.update_table(table, ["schema"])

462

print(f"Updated schema for {table.table_id}")

463

464

# Verify schema changes

465

updated_table = client.get_table(table.reference)

466

print("New schema fields:")

467

for field in updated_table.schema:

468

if field.name not in [f.name for f in current_schema]:

469

print(f" {field.name}: {field.field_type} ({field.mode})")

470

```

471

472

### Table Data Operations

473

474

```python

475

# Get table metadata and statistics

476

table = client.get_table(f"{client.project}.transactions.daily_transactions")

477

478

print(f"Table: {table.table_id}")

479

print(f"Size: {table.num_bytes:,} bytes ({table.num_bytes / 1024**3:.2f} GB)")

480

print(f"Rows: {table.num_rows:,}")

481

print(f"Created: {table.created}")

482

print(f"Modified: {table.modified}")

483

484

if table.time_partitioning:

485

print(f"Partitioned by: {table.time_partitioning.field or 'ingestion time'}")

486

print(f"Partition type: {table.time_partitioning.type_}")

487

488

if table.clustering_fields:

489

print(f"Clustered by: {', '.join(table.clustering_fields)}")

490

491

# Copy table

492

source_table = f"{client.project}.transactions.daily_transactions"

493

dest_table = f"{client.project}.backups.daily_transactions_backup"

494

495

job_config = bigquery.CopyJobConfig(

496

write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE

497

)

498

499

copy_job = client.copy_table(source_table, dest_table, job_config=job_config)

500

copy_job.result() # Wait for completion

501

502

print(f"Copied {source_table} to {dest_table}")

503

```

504

505

### Table Maintenance

506

507

```python

508

# List tables in dataset with metadata

509

dataset_id = f"{client.project}.transactions"

510

tables = client.list_tables(dataset_id)

511

512

print("Tables in dataset:")

513

for table_item in tables:

514

table = client.get_table(table_item.reference)

515

size_gb = table.num_bytes / (1024**3) if table.num_bytes else 0

516

517

print(f" {table.table_id}")

518

print(f" Rows: {table.num_rows:,}")

519

print(f" Size: {size_gb:.2f} GB")

520

print(f" Modified: {table.modified}")

521

522

if table.expires:

523

print(f" Expires: {table.expires}")

524

525

# Clean up old tables

526

cutoff_date = datetime.datetime.now() - datetime.timedelta(days=30)

527

528

for table_item in tables:

529

table = client.get_table(table_item.reference)

530

if table.modified < cutoff_date and table.table_id.startswith("temp_"):

531

print(f"Deleting old temp table: {table.table_id}")

532

client.delete_table(table.reference, not_found_ok=True)

533

534

# Update table metadata

535

table = client.get_table(f"{client.project}.transactions.daily_transactions")

536

table.description = "Updated: Daily transaction records with enhanced metadata"

537

table.labels.update({"last_updated": "2023-12-01"})

538

539

table = client.update_table(table, ["description", "labels"])

540

print("Updated table metadata")

541

```