0
# Table Operations
1
2
Table creation, schema management, data operations, and metadata management. BigQuery tables store structured data with enforced schemas and support various configurations including partitioning, clustering, and expiration policies.
3
4
## Capabilities
5
6
### Table Definition
7
8
Create and configure BigQuery tables with schemas, partitioning, and metadata.
9
10
```python { .api }
11
class Table:
12
def __init__(
13
self,
14
table_ref: Union[TableReference, str],
15
schema: List[SchemaField] = None
16
):
17
"""
18
Initialize a Table.
19
20
Args:
21
table_ref: Table reference string or TableReference object.
22
schema: Table schema as list of SchemaField objects.
23
"""
24
25
@property
26
def reference(self) -> TableReference:
27
"""Table reference object."""
28
29
@property
30
def table_id(self) -> str:
31
"""Table ID."""
32
33
@property
34
def dataset_id(self) -> str:
35
"""Dataset ID containing the table."""
36
37
@property
38
def project(self) -> str:
39
"""Project ID containing the table."""
40
41
@property
42
def schema(self) -> List[SchemaField]:
43
"""Table schema."""
44
45
@schema.setter
46
def schema(self, value: List[SchemaField]): ...
47
48
@property
49
def friendly_name(self) -> str:
50
"""Human-readable table name."""
51
52
@friendly_name.setter
53
def friendly_name(self, value: str): ...
54
55
@property
56
def description(self) -> str:
57
"""Table description."""
58
59
@description.setter
60
def description(self, value: str): ...
61
62
@property
63
def num_bytes(self) -> int:
64
"""Size of table in bytes."""
65
66
@property
67
def num_rows(self) -> int:
68
"""Number of rows in table."""
69
70
@property
71
def created(self) -> datetime.datetime:
72
"""Table creation timestamp."""
73
74
@property
75
def modified(self) -> datetime.datetime:
76
"""Table last modification timestamp."""
77
78
@property
79
def expires(self) -> datetime.datetime:
80
"""Table expiration timestamp."""
81
82
@expires.setter
83
def expires(self, value: datetime.datetime): ...
84
85
@property
86
def labels(self) -> Dict[str, str]:
87
"""Labels for organizing tables."""
88
89
@labels.setter
90
def labels(self, value: Dict[str, str]): ...
91
92
@property
93
def time_partitioning(self) -> TimePartitioning:
94
"""Time partitioning configuration."""
95
96
@time_partitioning.setter
97
def time_partitioning(self, value: TimePartitioning): ...
98
99
@property
100
def range_partitioning(self) -> RangePartitioning:
101
"""Range partitioning configuration."""
102
103
@range_partitioning.setter
104
def range_partitioning(self, value: RangePartitioning): ...
105
106
@property
107
def clustering_fields(self) -> List[str]:
108
"""Fields used for clustering."""
109
110
@clustering_fields.setter
111
def clustering_fields(self, value: List[str]): ...
112
113
@property
114
def require_partition_filter(self) -> bool:
115
"""Require partition filter in queries."""
116
117
@require_partition_filter.setter
118
def require_partition_filter(self, value: bool): ...
119
```
120
121
### Table Reference
122
123
Reference tables by project, dataset, and table ID for API operations.
124
125
```python { .api }
126
class TableReference:
127
def __init__(self, dataset_ref: DatasetReference, table_id: str):
128
"""
129
Reference to a BigQuery table.
130
131
Args:
132
dataset_ref: Dataset reference containing the table.
133
table_id: Table ID.
134
"""
135
136
@property
137
def dataset_id(self) -> str:
138
"""Dataset ID."""
139
140
@property
141
def project(self) -> str:
142
"""Project ID."""
143
144
@property
145
def table_id(self) -> str:
146
"""Table ID."""
147
148
@property
149
def path(self) -> str:
150
"""Full table path (project:dataset.table)."""
151
152
@classmethod
153
def from_string(
154
cls,
155
table_id: str,
156
default_project: str = None
157
) -> TableReference:
158
"""
159
Create TableReference from string.
160
161
Args:
162
table_id: Table ID with optional project and dataset.
163
default_project: Default project if not specified.
164
165
Returns:
166
TableReference: Table reference object.
167
"""
168
```
169
170
### Partitioning Configuration
171
172
Configure time-based and range-based table partitioning for query performance and cost optimization.
173
174
```python { .api }
175
class TimePartitioning:
176
def __init__(
177
self,
178
type_: str = None,
179
field: str = None,
180
expiration_ms: int = None,
181
require_partition_filter: bool = None,
182
):
183
"""
184
Time-based partitioning configuration.
185
186
Args:
187
type_: Partitioning type (DAY, HOUR, MONTH, YEAR).
188
field: Field to partition by (None for ingestion time).
189
expiration_ms: Partition expiration in milliseconds.
190
require_partition_filter: Require partition filter in queries.
191
"""
192
193
@property
194
def type_(self) -> str:
195
"""Partitioning type."""
196
197
@property
198
def field(self) -> str:
199
"""Partitioning field."""
200
201
@property
202
def expiration_ms(self) -> int:
203
"""Partition expiration in milliseconds."""
204
205
@property
206
def require_partition_filter(self) -> bool:
207
"""Require partition filter in queries."""
208
209
class RangePartitioning:
210
def __init__(self, field: str = None, range_: PartitionRange = None):
211
"""
212
Range-based partitioning configuration.
213
214
Args:
215
field: Field to partition by.
216
range_: Range configuration for partitioning.
217
"""
218
219
@property
220
def field(self) -> str:
221
"""Partitioning field."""
222
223
@property
224
def range_(self) -> PartitionRange:
225
"""Range configuration."""
226
227
class PartitionRange:
228
def __init__(self, start: int = None, end: int = None, interval: int = None):
229
"""
230
Range definition for range partitioning.
231
232
Args:
233
start: Start of range (inclusive).
234
end: End of range (exclusive).
235
interval: Interval between partitions.
236
"""
237
238
@property
239
def start(self) -> int:
240
"""Range start (inclusive)."""
241
242
@property
243
def end(self) -> int:
244
"""Range end (exclusive)."""
245
246
@property
247
def interval(self) -> int:
248
"""Partition interval."""
249
```
250
251
### Table Types and Views
252
253
Support for different table types including views, materialized views, and external tables.
254
255
```python { .api }
256
# View configuration
257
@property
258
def view_query(self) -> str:
259
"""SQL query defining the view."""
260
261
@view_query.setter
262
def view_query(self, value: str): ...
263
264
@property
265
def view_use_legacy_sql(self) -> bool:
266
"""Use legacy SQL for view query."""
267
268
@view_use_legacy_sql.setter
269
def view_use_legacy_sql(self, value: bool): ...
270
271
# Materialized view configuration
272
@property
273
def mview_query(self) -> str:
274
"""SQL query defining the materialized view."""
275
276
@mview_query.setter
277
def mview_query(self, value: str): ...
278
279
@property
280
def mview_enable_refresh(self) -> bool:
281
"""Enable automatic refresh of materialized view."""
282
283
@mview_enable_refresh.setter
284
def mview_enable_refresh(self, value: bool): ...
285
286
@property
287
def mview_refresh_interval_ms(self) -> int:
288
"""Refresh interval in milliseconds."""
289
290
@mview_refresh_interval_ms.setter
291
def mview_refresh_interval_ms(self, value: int): ...
292
293
# External table configuration
294
@property
295
def external_data_configuration(self) -> ExternalConfig:
296
"""External data source configuration."""
297
298
@external_data_configuration.setter
299
def external_data_configuration(self, value: ExternalConfig): ...
300
```
301
302
## Usage Examples
303
304
### Create Standard Table
305
306
```python
307
from google.cloud import bigquery
308
309
client = bigquery.Client()
310
311
# Define table schema
312
schema = [
313
bigquery.SchemaField("transaction_id", "STRING", mode="REQUIRED"),
314
bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),
315
bigquery.SchemaField("amount", "NUMERIC", mode="REQUIRED", precision=10, scale=2),
316
bigquery.SchemaField("currency", "STRING", mode="REQUIRED"),
317
bigquery.SchemaField("transaction_date", "DATE", mode="REQUIRED"),
318
bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
319
bigquery.SchemaField("metadata", "JSON", mode="NULLABLE"),
320
]
321
322
# Create table
323
table_id = f"{client.project}.transactions.daily_transactions"
324
table = bigquery.Table(table_id, schema=schema)
325
326
# Configure table properties
327
table.friendly_name = "Daily Transactions"
328
table.description = "Daily transaction records with user and payment information"
329
table.labels = {
330
"environment": "production",
331
"data_type": "financial"
332
}
333
334
# Set expiration (90 days)
335
table.expires = datetime.datetime.now() + datetime.timedelta(days=90)
336
337
# Create the table
338
table = client.create_table(table, exists_ok=True)
339
print(f"Created table {table.table_id}")
340
```
341
342
### Create Partitioned Table
343
344
```python
345
# Create time-partitioned table
346
schema = [
347
bigquery.SchemaField("event_id", "STRING", mode="REQUIRED"),
348
bigquery.SchemaField("user_id", "INTEGER", mode="REQUIRED"),
349
bigquery.SchemaField("event_type", "STRING", mode="REQUIRED"),
350
bigquery.SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED"),
351
bigquery.SchemaField("properties", "JSON", mode="NULLABLE"),
352
]
353
354
table = bigquery.Table(f"{client.project}.analytics.events", schema=schema)
355
356
# Configure time partitioning by event_timestamp field
357
table.time_partitioning = bigquery.TimePartitioning(
358
type_=bigquery.TimePartitioningType.DAY,
359
field="event_timestamp",
360
expiration_ms=30 * 24 * 60 * 60 * 1000, # 30 days
361
require_partition_filter=True
362
)
363
364
# Add clustering for better query performance
365
table.clustering_fields = ["event_type", "user_id"]
366
367
table = client.create_table(table, exists_ok=True)
368
print(f"Created partitioned table {table.table_id}")
369
```
370
371
### Create Range-Partitioned Table
372
373
```python
374
# Create range-partitioned table
375
schema = [
376
bigquery.SchemaField("customer_id", "INTEGER", mode="REQUIRED"),
377
bigquery.SchemaField("region_code", "INTEGER", mode="REQUIRED"),
378
bigquery.SchemaField("customer_name", "STRING", mode="REQUIRED"),
379
bigquery.SchemaField("signup_date", "DATE", mode="REQUIRED"),
380
]
381
382
table = bigquery.Table(f"{client.project}.customers.customers_by_region", schema=schema)
383
384
# Configure range partitioning by region_code
385
table.range_partitioning = bigquery.RangePartitioning(
386
field="region_code",
387
range_=bigquery.PartitionRange(start=0, end=1000, interval=100)
388
)
389
390
table = client.create_table(table, exists_ok=True)
391
print(f"Created range-partitioned table {table.table_id}")
392
```
393
394
### Create View
395
396
```python
397
# Create a view
398
view_query = """
399
SELECT
400
t.transaction_id,
401
t.user_id,
402
t.amount,
403
t.currency,
404
t.transaction_date,
405
u.user_name,
406
u.email
407
FROM `{}.transactions.daily_transactions` t
408
JOIN `{}.users.user_profiles` u ON t.user_id = u.user_id
409
WHERE t.transaction_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
410
""".format(client.project, client.project)
411
412
view = bigquery.Table(f"{client.project}.reports.recent_transactions_with_users")
413
view.view_query = view_query
414
view.description = "Recent transactions joined with user information"
415
416
view = client.create_table(view, exists_ok=True)
417
print(f"Created view {view.table_id}")
418
```
419
420
### Create Materialized View
421
422
```python
423
# Create materialized view for aggregated data
424
mview_query = """
425
SELECT
426
DATE(created_at) as transaction_date,
427
currency,
428
COUNT(*) as transaction_count,
429
SUM(amount) as total_amount,
430
AVG(amount) as avg_amount
431
FROM `{}.transactions.daily_transactions`
432
GROUP BY DATE(created_at), currency
433
""".format(client.project)
434
435
mview = bigquery.Table(f"{client.project}.reports.daily_transaction_summary")
436
mview.mview_query = mview_query
437
mview.mview_enable_refresh = True
438
mview.mview_refresh_interval_ms = 60 * 60 * 1000 # 1 hour
439
440
mview = client.create_table(mview, exists_ok=True)
441
print(f"Created materialized view {mview.table_id}")
442
```
443
444
### Table Schema Evolution
445
446
```python
447
# Get existing table
448
table = client.get_table(f"{client.project}.transactions.daily_transactions")
449
current_schema = table.schema
450
451
# Add new fields to schema
452
new_schema = list(current_schema)
453
new_schema.extend([
454
bigquery.SchemaField("payment_method", "STRING", mode="NULLABLE"),
455
bigquery.SchemaField("merchant_id", "INTEGER", mode="NULLABLE"),
456
bigquery.SchemaField("risk_score", "FLOAT", mode="NULLABLE"),
457
])
458
459
# Update table schema
460
table.schema = new_schema
461
table = client.update_table(table, ["schema"])
462
print(f"Updated schema for {table.table_id}")
463
464
# Verify schema changes
465
updated_table = client.get_table(table.reference)
466
print("New schema fields:")
467
for field in updated_table.schema:
468
if field.name not in [f.name for f in current_schema]:
469
print(f" {field.name}: {field.field_type} ({field.mode})")
470
```
471
472
### Table Data Operations
473
474
```python
475
# Get table metadata and statistics
476
table = client.get_table(f"{client.project}.transactions.daily_transactions")
477
478
print(f"Table: {table.table_id}")
479
print(f"Size: {table.num_bytes:,} bytes ({table.num_bytes / 1024**3:.2f} GB)")
480
print(f"Rows: {table.num_rows:,}")
481
print(f"Created: {table.created}")
482
print(f"Modified: {table.modified}")
483
484
if table.time_partitioning:
485
print(f"Partitioned by: {table.time_partitioning.field or 'ingestion time'}")
486
print(f"Partition type: {table.time_partitioning.type_}")
487
488
if table.clustering_fields:
489
print(f"Clustered by: {', '.join(table.clustering_fields)}")
490
491
# Copy table
492
source_table = f"{client.project}.transactions.daily_transactions"
493
dest_table = f"{client.project}.backups.daily_transactions_backup"
494
495
job_config = bigquery.CopyJobConfig(
496
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
497
)
498
499
copy_job = client.copy_table(source_table, dest_table, job_config=job_config)
500
copy_job.result() # Wait for completion
501
502
print(f"Copied {source_table} to {dest_table}")
503
```
504
505
### Table Maintenance
506
507
```python
508
# List tables in dataset with metadata
509
dataset_id = f"{client.project}.transactions"
510
tables = client.list_tables(dataset_id)
511
512
print("Tables in dataset:")
513
for table_item in tables:
514
table = client.get_table(table_item.reference)
515
size_gb = table.num_bytes / (1024**3) if table.num_bytes else 0
516
517
print(f" {table.table_id}")
518
print(f" Rows: {table.num_rows:,}")
519
print(f" Size: {size_gb:.2f} GB")
520
print(f" Modified: {table.modified}")
521
522
if table.expires:
523
print(f" Expires: {table.expires}")
524
525
# Clean up old tables
526
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=30)
527
528
for table_item in tables:
529
table = client.get_table(table_item.reference)
530
if table.modified < cutoff_date and table.table_id.startswith("temp_"):
531
print(f"Deleting old temp table: {table.table_id}")
532
client.delete_table(table.reference, not_found_ok=True)
533
534
# Update table metadata
535
table = client.get_table(f"{client.project}.transactions.daily_transactions")
536
table.description = "Updated: Daily transaction records with enhanced metadata"
537
table.labels.update({"last_updated": "2023-12-01"})
538
539
table = client.update_table(table, ["description", "labels"])
540
print("Updated table metadata")
541
```