Tessl Tile for pypi/google-cloud-bigquery@3.36.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

client-operations.md data-loading.md database-api.md dataset-management.md index.md models-routines.md query-operations.md query-parameters.md schema-definition.md table-operations.md

table-operations.mddocs/

0
# Table Operations
1

2
Table creation, schema management, data operations, and metadata management. BigQuery tables store structured data with enforced schemas and support various configurations including partitioning, clustering, and expiration policies.
3

4
## Capabilities  
5

6
### Table Definition
7

8
Create and configure BigQuery tables with schemas, partitioning, and metadata.
9

10
```python { .api }
11
class Table:
12
    def __init__(
13
        self,
14
        table_ref: Union[TableReference, str],
15
        schema: List[SchemaField] = None
16
    ):
17
        """
18
        Initialize a Table.
19

20
        Args:
21
            table_ref: Table reference string or TableReference object.
22
            schema: Table schema as list of SchemaField objects.
23
        """
24

25
    @property
26
    def reference(self) -> TableReference:
27
        """Table reference object."""
28

29
    @property
30
    def table_id(self) -> str:
31
        """Table ID."""
32

33
    @property
34
    def dataset_id(self) -> str:
35
        """Dataset ID containing the table."""
36

37
    @property
38
    def project(self) -> str:
39
        """Project ID containing the table."""
40

41
    @property
42
    def schema(self) -> List[SchemaField]:
43
        """Table schema."""
44

45
    @schema.setter
46
    def schema(self, value: List[SchemaField]): ...
47

48
    @property
49
    def friendly_name(self) -> str:
50
        """Human-readable table name."""
51

52
    @friendly_name.setter
53
    def friendly_name(self, value: str): ...
54

55
    @property
56
    def description(self) -> str:
57
        """Table description."""
58

59
    @description.setter
60
    def description(self, value: str): ...
61

62
    @property
63
    def num_bytes(self) -> int:
64
        """Size of table in bytes."""
65

66
    @property
67
    def num_rows(self) -> int:
68
        """Number of rows in table."""
69

70
    @property
71
    def created(self) -> datetime.datetime:
72
        """Table creation timestamp."""
73

74
    @property
75
    def modified(self) -> datetime.datetime:
76
        """Table last modification timestamp."""
77

78
    @property
79
    def expires(self) -> datetime.datetime:
80
        """Table expiration timestamp."""
81

82
    @expires.setter
83
    def expires(self, value: datetime.datetime): ...
84

85
    @property
86
    def labels(self) -> Dict[str, str]:
87
        """Labels for organizing tables."""
88

89
    @labels.setter
90
    def labels(self, value: Dict[str, str]): ...
91

92
    @property
93
    def time_partitioning(self) -> TimePartitioning:
94
        """Time partitioning configuration."""
95

96
    @time_partitioning.setter
97
    def time_partitioning(self, value: TimePartitioning): ...
98

99
    @property
100
    def range_partitioning(self) -> RangePartitioning:
101
        """Range partitioning configuration."""
102

103
    @range_partitioning.setter
104
    def range_partitioning(self, value: RangePartitioning): ...
105

106
    @property
107
    def clustering_fields(self) -> List[str]:
108
        """Fields used for clustering."""
109

110
    @clustering_fields.setter
111
    def clustering_fields(self, value: List[str]): ...
112

113
    @property
114
    def require_partition_filter(self) -> bool:
115
        """Require partition filter in queries."""
116

117
    @require_partition_filter.setter
118
    def require_partition_filter(self, value: bool): ...
119
```
120

121
### Table Reference
122

123
Reference tables by project, dataset, and table ID for API operations.
124

125
```python { .api }
126
class TableReference:
127
    def __init__(self, dataset_ref: DatasetReference, table_id: str):
128
        """
129
        Reference to a BigQuery table.
130

131
        Args:
132
            dataset_ref: Dataset reference containing the table.
133
            table_id: Table ID.
134
        """
135

136
    @property
137
    def dataset_id(self) -> str:
138
        """Dataset ID."""
139

140
    @property
141
    def project(self) -> str:
142
        """Project ID."""
143

144
    @property
145
    def table_id(self) -> str:
146
        """Table ID."""
147

148
    @property
149
    def path(self) -> str:
150
        """Full table path (project:dataset.table)."""
151

152
    @classmethod
153
    def from_string(
154
        cls, 
155
        table_id: str, 
156
        default_project: str = None
157
    ) -> TableReference:
158
        """
159
        Create TableReference from string.
160

161
        Args:
162
            table_id: Table ID with optional project and dataset.
163
            default_project: Default project if not specified.
164

165
        Returns:
166
            TableReference: Table reference object.
167
        """
168
```
169

170
### Partitioning Configuration
171

172
Configure time-based and range-based table partitioning for query performance and cost optimization.
173

174
```python { .api }
175
class TimePartitioning:
176
    def __init__(
177
        self,
178
        type_: str = None,
179
        field: str = None,
180
        expiration_ms: int = None,
181
        require_partition_filter: bool = None,
182
    ):
183
        """
184
        Time-based partitioning configuration.
185

186
        Args:
187
            type_: Partitioning type (DAY, HOUR, MONTH, YEAR).
188
            field: Field to partition by (None for ingestion time).
189
            expiration_ms: Partition expiration in milliseconds.
190
            require_partition_filter: Require partition filter in queries.
191
        """
192

193
    @property
194
    def type_(self) -> str:
195
        """Partitioning type."""
196

197
    @property
198
    def field(self) -> str:
199
        """Partitioning field."""
200

201
    @property
202
    def expiration_ms(self) -> int:
203
        """Partition expiration in milliseconds."""
204

205
    @property
206
    def require_partition_filter(self) -> bool:
207
        """Require partition filter in queries."""
208

209
class RangePartitioning:
210
    def __init__(self, field: str = None, range_: PartitionRange = None):
211
        """
212
        Range-based partitioning configuration.
213

214
        Args:
215
            field: Field to partition by.
216
            range_: Range configuration for partitioning.
217
        """
218

219
    @property
220
    def field(self) -> str:
221
        """Partitioning field."""
222

223
    @property
224
    def range_(self) -> PartitionRange:
225
        """Range configuration."""
226

227
class PartitionRange:
228
    def __init__(self, start: int = None, end: int = None, interval: int = None):
229
        """
230
        Range definition for range partitioning.
231

232
        Args:
233
            start: Start of range (inclusive).
234
            end: End of range (exclusive).
235
            interval: Interval between partitions.
236
        """
237

238
    @property
239
    def start(self) -> int:
240
        """Range start (inclusive)."""
241

242
    @property
243
    def end(self) -> int:
244
        """Range end (exclusive)."""
245

246
    @property
247
    def interval(self) -> int:
248
        """Partition interval."""
249
```
250

251
### Table Types and Views
252

253
Support for different table types including views, materialized views, and external tables.
254

255
```python { .api }
256
# View configuration
257
@property
258
def view_query(self) -> str:
259
    """SQL query defining the view."""
260

261
@view_query.setter  
262
def view_query(self, value: str): ...
263

264
@property
265
def view_use_legacy_sql(self) -> bool:
266
    """Use legacy SQL for view query."""
267

268
@view_use_legacy_sql.setter
269
def view_use_legacy_sql(self, value: bool): ...
270

271
# Materialized view configuration
272
@property
273
def mview_query(self) -> str:
274
    """SQL query defining the materialized view."""
275

276
@mview_query.setter
277
def mview_query(self, value: str): ...
278

279
@property
280
def mview_enable_refresh(self) -> bool:
281
    """Enable automatic refresh of materialized view."""
282

283
@mview_enable_refresh.setter
284
def mview_enable_refresh(self, value: bool): ...
285

286
@property
287
def mview_refresh_interval_ms(self) -> int:
288
    """Refresh interval in milliseconds."""
289

290
@mview_refresh_interval_ms.setter
291
def mview_refresh_interval_ms(self, value: int): ...
292

293
# External table configuration
294
@property
295
def external_data_configuration(self) -> ExternalConfig:
296
    """External data source configuration."""
297

298
@external_data_configuration.setter
299
def external_data_configuration(self, value: ExternalConfig): ...
300
```
301

302
## Usage Examples
303

304
### Create Standard Table
305

306
```python
307
from google.cloud import bigquery
308

309
client = bigquery.Client()
310

311
# Define table schema
312
schema = [
313
    bigquery.SchemaField("transaction_id", "STRING", mode="REQUIRED"),
314
    bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),
315
    bigquery.SchemaField("amount", "NUMERIC", mode="REQUIRED", precision=10, scale=2),
316
    bigquery.SchemaField("currency", "STRING", mode="REQUIRED"),
317
    bigquery.SchemaField("transaction_date", "DATE", mode="REQUIRED"),
318
    bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
319
    bigquery.SchemaField("metadata", "JSON", mode="NULLABLE"),
320
]
321

322
# Create table
323
table_id = f"{client.project}.transactions.daily_transactions"
324
table = bigquery.Table(table_id, schema=schema)
325

326
# Configure table properties
327
table.friendly_name = "Daily Transactions"
328
table.description = "Daily transaction records with user and payment information"
329
table.labels = {
330
    "environment": "production",
331
    "data_type": "financial"
332
}
333

334
# Set expiration (90 days)
335
table.expires = datetime.datetime.now() + datetime.timedelta(days=90)
336

337
# Create the table
338
table = client.create_table(table, exists_ok=True)
339
print(f"Created table {table.table_id}")
340
```
341

342
### Create Partitioned Table
343

344
```python
345
# Create time-partitioned table
346
schema = [
347
    bigquery.SchemaField("event_id", "STRING", mode="REQUIRED"),
348
    bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),
349
    bigquery.SchemaField("event_type", "STRING", mode="REQUIRED"),
350
    bigquery.SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED"),
351
    bigquery.SchemaField("properties", "JSON", mode="NULLABLE"),
352
]
353

354
table = bigquery.Table(f"{client.project}.analytics.events", schema=schema)
355

356
# Configure time partitioning by event_timestamp field
357
table.time_partitioning = bigquery.TimePartitioning(
358
    type_=bigquery.TimePartitioningType.DAY,
359
    field="event_timestamp",
360
    expiration_ms=30 * 24 * 60 * 60 * 1000,  # 30 days
361
    require_partition_filter=True
362
)
363

364
# Add clustering for better query performance
365
table.clustering_fields = ["event_type", "user_id"]
366

367
table = client.create_table(table, exists_ok=True)
368
print(f"Created partitioned table {table.table_id}")
369
```
370

371
### Create Range-Partitioned Table
372

373
```python
374
# Create range-partitioned table
375
schema = [
376
    bigquery.SchemaField("customer_id", "INTEGER", mode="REQUIRED"),
377
    bigquery.SchemaField("region_code", "INTEGER", mode="REQUIRED"),
378
    bigquery.SchemaField("customer_name", "STRING", mode="REQUIRED"),
379
    bigquery.SchemaField("signup_date", "DATE", mode="REQUIRED"),
380
]
381

382
table = bigquery.Table(f"{client.project}.customers.customers_by_region", schema=schema)
383

384
# Configure range partitioning by region_code
385
table.range_partitioning = bigquery.RangePartitioning(
386
    field="region_code",
387
    range_=bigquery.PartitionRange(start=0, end=1000, interval=100)
388
)
389

390
table = client.create_table(table, exists_ok=True)
391
print(f"Created range-partitioned table {table.table_id}")
392
```
393

394
### Create View
395

396
```python
397
# Create a view
398
view_query = """
399
SELECT 
400
    t.transaction_id,
401
    t.user_id,
402
    t.amount,
403
    t.currency,
404
    t.transaction_date,
405
    u.user_name,
406
    u.email
407
FROM `{}.transactions.daily_transactions` t
408
JOIN `{}.users.user_profiles` u ON t.user_id = u.user_id
409
WHERE t.transaction_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
410
""".format(client.project, client.project)
411

412
view = bigquery.Table(f"{client.project}.reports.recent_transactions_with_users")
413
view.view_query = view_query
414
view.description = "Recent transactions joined with user information"
415

416
view = client.create_table(view, exists_ok=True)
417
print(f"Created view {view.table_id}")
418
```
419

420
### Create Materialized View
421

422
```python
423
# Create materialized view for aggregated data
424
mview_query = """
425
SELECT 
426
    DATE(created_at) as transaction_date,
427
    currency,
428
    COUNT(*) as transaction_count,
429
    SUM(amount) as total_amount,
430
    AVG(amount) as avg_amount
431
FROM `{}.transactions.daily_transactions`
432
GROUP BY DATE(created_at), currency
433
""".format(client.project)
434

435
mview = bigquery.Table(f"{client.project}.reports.daily_transaction_summary")
436
mview.mview_query = mview_query
437
mview.mview_enable_refresh = True
438
mview.mview_refresh_interval_ms = 60 * 60 * 1000  # 1 hour
439

440
mview = client.create_table(mview, exists_ok=True)
441
print(f"Created materialized view {mview.table_id}")
442
```
443

444
### Table Schema Evolution
445

446
```python
447
# Get existing table
448
table = client.get_table(f"{client.project}.transactions.daily_transactions")
449
current_schema = table.schema
450

451
# Add new fields to schema
452
new_schema = list(current_schema)
453
new_schema.extend([
454
    bigquery.SchemaField("payment_method", "STRING", mode="NULLABLE"),
455
    bigquery.SchemaField("merchant_id", "INTEGER", mode="NULLABLE"),
456
    bigquery.SchemaField("risk_score", "FLOAT", mode="NULLABLE"),
457
])
458

459
# Update table schema
460
table.schema = new_schema
461
table = client.update_table(table, ["schema"])
462
print(f"Updated schema for {table.table_id}")
463

464
# Verify schema changes
465
updated_table = client.get_table(table.reference)
466
print("New schema fields:")
467
for field in updated_table.schema:
468
    if field.name not in [f.name for f in current_schema]:
469
        print(f"  {field.name}: {field.field_type} ({field.mode})")
470
```
471

472
### Table Data Operations
473

474
```python
475
# Get table metadata and statistics
476
table = client.get_table(f"{client.project}.transactions.daily_transactions")
477

478
print(f"Table: {table.table_id}")
479
print(f"Size: {table.num_bytes:,} bytes ({table.num_bytes / 1024**3:.2f} GB)")
480
print(f"Rows: {table.num_rows:,}")
481
print(f"Created: {table.created}")
482
print(f"Modified: {table.modified}")
483

484
if table.time_partitioning:
485
    print(f"Partitioned by: {table.time_partitioning.field or 'ingestion time'}")
486
    print(f"Partition type: {table.time_partitioning.type_}")
487

488
if table.clustering_fields:
489
    print(f"Clustered by: {', '.join(table.clustering_fields)}")
490

491
# Copy table
492
source_table = f"{client.project}.transactions.daily_transactions"
493
dest_table = f"{client.project}.backups.daily_transactions_backup"
494

495
job_config = bigquery.CopyJobConfig(
496
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
497
)
498

499
copy_job = client.copy_table(source_table, dest_table, job_config=job_config)
500
copy_job.result()  # Wait for completion
501

502
print(f"Copied {source_table} to {dest_table}")
503
```
504

505
### Table Maintenance
506

507
```python
508
# List tables in dataset with metadata
509
dataset_id = f"{client.project}.transactions"
510
tables = client.list_tables(dataset_id)
511

512
print("Tables in dataset:")
513
for table_item in tables:
514
    table = client.get_table(table_item.reference)
515
    size_gb = table.num_bytes / (1024**3) if table.num_bytes else 0
516
    
517
    print(f"  {table.table_id}")
518
    print(f"    Rows: {table.num_rows:,}")
519
    print(f"    Size: {size_gb:.2f} GB") 
520
    print(f"    Modified: {table.modified}")
521
    
522
    if table.expires:
523
        print(f"    Expires: {table.expires}")
524

525
# Clean up old tables
526
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=30)
527

528
for table_item in tables:
529
    table = client.get_table(table_item.reference)
530
    if table.modified < cutoff_date and table.table_id.startswith("temp_"):
531
        print(f"Deleting old temp table: {table.table_id}")
532
        client.delete_table(table.reference, not_found_ok=True)
533

534
# Update table metadata
535
table = client.get_table(f"{client.project}.transactions.daily_transactions")
536
table.description = "Updated: Daily transaction records with enhanced metadata"
537
table.labels.update({"last_updated": "2023-12-01"})
538

539
table = client.update_table(table, ["description", "labels"])
540
print("Updated table metadata")
541
```

Version

Tile

Files

table-operations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

table-operations.mddocs/