0
# Schema Definition
1
2
Type-safe schema definition with field specifications, modes, and descriptions. BigQuery schemas define the structure of tables and query results, ensuring data integrity and enabling proper type handling across the API.
3
4
## Capabilities
5
6
### Schema Field Definition
7
8
Define individual fields in a BigQuery table schema with complete type information and constraints.
9
10
```python { .api }
11
class SchemaField:
12
def __init__(
13
self,
14
name: str,
15
field_type: str,
16
mode: str = "NULLABLE",
17
description: str = None,
18
fields: Tuple[SchemaField, ...] = (),
19
policy_tags: PolicyTagList = None,
20
precision: int = None,
21
scale: int = None,
22
max_length: int = None,
23
default_value_expression: str = None,
24
):
25
"""
26
Define a BigQuery table schema field.
27
28
Args:
29
name: Field name.
30
field_type: BigQuery data type (STRING, INTEGER, FLOAT, etc.).
31
mode: Field mode (NULLABLE, REQUIRED, REPEATED).
32
description: Field description.
33
fields: Nested fields for RECORD types.
34
policy_tags: Policy tags for column-level security.
35
precision: Precision for NUMERIC/BIGNUMERIC types.
36
scale: Scale for NUMERIC/BIGNUMERIC types.
37
max_length: Maximum length for STRING/BYTES types.
38
default_value_expression: Default value expression.
39
"""
40
41
@property
42
def name(self) -> str:
43
"""Field name."""
44
45
@property
46
def field_type(self) -> str:
47
"""BigQuery data type."""
48
49
@property
50
def mode(self) -> str:
51
"""Field mode (NULLABLE, REQUIRED, REPEATED)."""
52
53
@property
54
def description(self) -> str:
55
"""Field description."""
56
57
@property
58
def fields(self) -> Tuple[SchemaField, ...]:
59
"""Nested fields for RECORD types."""
60
61
@property
62
def is_nullable(self) -> bool:
63
"""True if field can contain NULL values."""
64
65
@property
66
def is_repeated(self) -> bool:
67
"""True if field is repeated (array)."""
68
69
def to_api_repr(self) -> Dict[str, Any]:
70
"""Convert to API representation."""
71
72
@classmethod
73
def from_api_repr(cls, api_repr: Dict[str, Any]) -> SchemaField:
74
"""Create from API representation."""
75
```
76
77
### Policy Tags for Column-Level Security
78
79
Apply policy tags for column-level access control and data governance.
80
81
```python { .api }
82
class PolicyTagList:
83
def __init__(self, names: List[str] = None):
84
"""
85
List of policy tag resource names for column-level security.
86
87
Args:
88
names: Policy tag resource names.
89
"""
90
91
@property
92
def names(self) -> List[str]:
93
"""Policy tag resource names."""
94
95
@names.setter
96
def names(self, value: List[str]): ...
97
```
98
99
### Field Element Types
100
101
Define element types for repeated (array) fields with proper type constraints.
102
103
```python { .api }
104
class FieldElementType:
105
def __init__(self, element_type: str):
106
"""
107
Element type for repeated fields.
108
109
Args:
110
element_type: BigQuery data type for array elements.
111
"""
112
113
@property
114
def element_type(self) -> str:
115
"""Element type for array fields."""
116
```
117
118
### Standard SQL Type System
119
120
Represent Standard SQL types for advanced type definitions and compatibility.
121
122
```python { .api }
123
class StandardSqlDataType:
124
def __init__(self, **kwargs): ...
125
126
@property
127
def type_kind(self) -> str:
128
"""Type kind (INT64, STRING, ARRAY, STRUCT, etc.)."""
129
130
@property
131
def array_element_type(self) -> StandardSqlDataType:
132
"""Element type for ARRAY types."""
133
134
@property
135
def struct_type(self) -> StandardSqlStructType:
136
"""Struct definition for STRUCT types."""
137
138
class StandardSqlField:
139
def __init__(self, name: str = None, type: StandardSqlDataType = None):
140
"""
141
Field in Standard SQL type system.
142
143
Args:
144
name: Field name.
145
type: Field data type.
146
"""
147
148
@property
149
def name(self) -> str:
150
"""Field name."""
151
152
@property
153
def type(self) -> StandardSqlDataType:
154
"""Field data type."""
155
156
class StandardSqlStructType:
157
def __init__(self, fields: List[StandardSqlField] = None):
158
"""
159
Struct type definition in Standard SQL.
160
161
Args:
162
fields: List of struct fields.
163
"""
164
165
@property
166
def fields(self) -> List[StandardSqlField]:
167
"""Struct fields."""
168
169
class StandardSqlTableType:
170
def __init__(self, columns: List[StandardSqlField] = None):
171
"""
172
Table type definition in Standard SQL.
173
174
Args:
175
columns: List of table columns.
176
"""
177
178
@property
179
def columns(self) -> List[StandardSqlField]:
180
"""Table columns."""
181
```
182
183
## Data Types Reference
184
185
### Basic Types
186
187
```python { .api }
188
# String types
189
STRING_TYPE = "STRING" # Variable-length character string
190
BYTES_TYPE = "BYTES" # Variable-length byte sequence
191
192
# Numeric types
193
INTEGER_TYPE = "INTEGER" # 64-bit signed integer
194
INT64_TYPE = "INT64" # 64-bit signed integer (preferred)
195
FLOAT_TYPE = "FLOAT" # IEEE 754 double-precision
196
FLOAT64_TYPE = "FLOAT64" # IEEE 754 double-precision (preferred)
197
NUMERIC_TYPE = "NUMERIC" # High-precision decimal (38 digits, 9 after decimal)
198
BIGNUMERIC_TYPE = "BIGNUMERIC" # High-precision decimal (76 digits, 38 after decimal)
199
200
# Boolean type
201
BOOLEAN_TYPE = "BOOLEAN" # True/False values
202
BOOL_TYPE = "BOOL" # True/False values (preferred)
203
204
# Date and time types
205
DATE_TYPE = "DATE" # Calendar date (YYYY-MM-DD)
206
DATETIME_TYPE = "DATETIME" # Date and time (YYYY-MM-DD HH:MM:SS)
207
TIME_TYPE = "TIME" # Time of day (HH:MM:SS)
208
TIMESTAMP_TYPE = "TIMESTAMP" # Absolute point in time with timezone
209
210
# Geographic type
211
GEOGRAPHY_TYPE = "GEOGRAPHY" # Geographic data (points, lines, polygons)
212
213
# JSON type
214
JSON_TYPE = "JSON" # JSON document
215
216
# Complex types
217
RECORD_TYPE = "RECORD" # Nested structure
218
STRUCT_TYPE = "STRUCT" # Nested structure (preferred)
219
```
220
221
### Field Modes
222
223
```python { .api }
224
NULLABLE_MODE = "NULLABLE" # Field can be NULL (default)
225
REQUIRED_MODE = "REQUIRED" # Field cannot be NULL
226
REPEATED_MODE = "REPEATED" # Field is an array
227
```
228
229
## Usage Examples
230
231
### Basic Schema Definition
232
233
```python
234
from google.cloud.bigquery import SchemaField
235
236
# Simple table schema
237
schema = [
238
SchemaField("id", "INTEGER", mode="REQUIRED"),
239
SchemaField("name", "STRING", mode="REQUIRED"),
240
SchemaField("email", "STRING", mode="NULLABLE"),
241
SchemaField("age", "INTEGER", mode="NULLABLE"),
242
SchemaField("is_active", "BOOLEAN", mode="NULLABLE"),
243
SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
244
]
245
246
# Create table with schema
247
table = bigquery.Table(f"{project_id}.{dataset_id}.{table_id}", schema=schema)
248
table = client.create_table(table)
249
```
250
251
### Complex Schema with Nested Fields
252
253
```python
254
# Schema with nested RECORD fields
255
schema = [
256
SchemaField("user_id", "INTEGER", mode="REQUIRED"),
257
SchemaField("profile", "RECORD", mode="NULLABLE", fields=[
258
SchemaField("first_name", "STRING", mode="REQUIRED"),
259
SchemaField("last_name", "STRING", mode="REQUIRED"),
260
SchemaField("address", "RECORD", mode="NULLABLE", fields=[
261
SchemaField("street", "STRING", mode="NULLABLE"),
262
SchemaField("city", "STRING", mode="NULLABLE"),
263
SchemaField("state", "STRING", mode="NULLABLE"),
264
SchemaField("zip_code", "STRING", mode="NULLABLE"),
265
]),
266
]),
267
SchemaField("tags", "STRING", mode="REPEATED"), # Array of strings
268
SchemaField("scores", "FLOAT", mode="REPEATED"), # Array of floats
269
]
270
```
271
272
### Schema with Advanced Types
273
274
```python
275
# Schema with precision, scale, and policy tags
276
from google.cloud.bigquery import PolicyTagList
277
278
policy_tags = PolicyTagList(names=[
279
"projects/my-project/locations/us/taxonomies/123/policyTags/456"
280
])
281
282
schema = [
283
SchemaField("transaction_id", "STRING", mode="REQUIRED"),
284
SchemaField("amount", "NUMERIC", mode="REQUIRED",
285
precision=10, scale=2, description="Transaction amount in USD"),
286
SchemaField("customer_ssn", "STRING", mode="NULLABLE",
287
policy_tags=policy_tags, description="Customer SSN (PII)"),
288
SchemaField("metadata", "JSON", mode="NULLABLE"),
289
SchemaField("location", "GEOGRAPHY", mode="NULLABLE"),
290
SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED",
291
default_value_expression="CURRENT_TIMESTAMP()"),
292
]
293
```
294
295
### Working with Schema Evolution
296
297
```python
298
# Get existing table schema
299
table = client.get_table(f"{project_id}.{dataset_id}.{table_id}")
300
current_schema = table.schema
301
302
# Add new fields (schema evolution)
303
new_schema = list(current_schema)
304
new_schema.extend([
305
SchemaField("new_column", "STRING", mode="NULLABLE"),
306
SchemaField("calculated_field", "FLOAT", mode="NULLABLE"),
307
])
308
309
# Update table schema
310
table.schema = new_schema
311
table = client.update_table(table, ["schema"])
312
```
313
314
### Schema Validation and Introspection
315
316
```python
317
def validate_schema(schema):
318
"""Validate schema definition."""
319
for field in schema:
320
print(f"Field: {field.name}")
321
print(f" Type: {field.field_type}")
322
print(f" Mode: {field.mode}")
323
print(f" Nullable: {field.is_nullable}")
324
print(f" Repeated: {field.is_repeated}")
325
326
if field.description:
327
print(f" Description: {field.description}")
328
329
if field.fields: # Nested fields
330
print(f" Nested fields:")
331
for nested_field in field.fields:
332
print(f" {nested_field.name}: {nested_field.field_type}")
333
334
# Load schema from JSON
335
import json
336
337
schema_json = [
338
{"name": "id", "type": "INTEGER", "mode": "REQUIRED"},
339
{"name": "name", "type": "STRING", "mode": "REQUIRED"},
340
{"name": "metadata", "type": "RECORD", "mode": "REPEATED", "fields": [
341
{"name": "key", "type": "STRING", "mode": "REQUIRED"},
342
{"name": "value", "type": "STRING", "mode": "NULLABLE"}
343
]}
344
]
345
346
schema = [SchemaField.from_api_repr(field) for field in schema_json]
347
validate_schema(schema)
348
```
349
350
### Schema Compatibility Checking
351
352
```python
353
def check_schema_compatibility(old_schema, new_schema):
354
"""Check if new schema is compatible with old schema."""
355
old_fields = {field.name: field for field in old_schema}
356
new_fields = {field.name: field for field in new_schema}
357
358
# Check for removed fields
359
removed_fields = set(old_fields.keys()) - set(new_fields.keys())
360
if removed_fields:
361
print(f"Warning: Fields removed: {removed_fields}")
362
363
# Check for type changes
364
for field_name in old_fields:
365
if field_name in new_fields:
366
old_field = old_fields[field_name]
367
new_field = new_fields[field_name]
368
369
if old_field.field_type != new_field.field_type:
370
print(f"Warning: Type changed for {field_name}: "
371
f"{old_field.field_type} -> {new_field.field_type}")
372
373
if old_field.mode == "REQUIRED" and new_field.mode != "REQUIRED":
374
print(f"OK: Field {field_name} changed from REQUIRED to {new_field.mode}")
375
376
if old_field.mode != "REQUIRED" and new_field.mode == "REQUIRED":
377
print(f"Error: Field {field_name} changed from {old_field.mode} to REQUIRED")
378
379
# Check for new fields
380
new_field_names = set(new_fields.keys()) - set(old_fields.keys())
381
for field_name in new_field_names:
382
field = new_fields[field_name]
383
if field.mode == "REQUIRED":
384
print(f"Error: New REQUIRED field added: {field_name}")
385
else:
386
print(f"OK: New optional field added: {field_name}")
387
```