Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities
—
Type-safe schema definition with field specifications, modes, and descriptions. BigQuery schemas define the structure of tables and query results, ensuring data integrity and enabling proper type handling across the API.
Define individual fields in a BigQuery table schema with complete type information and constraints.
class SchemaField:
def __init__(
self,
name: str,
field_type: str,
mode: str = "NULLABLE",
description: str = None,
fields: Tuple[SchemaField, ...] = (),
policy_tags: PolicyTagList = None,
precision: int = None,
scale: int = None,
max_length: int = None,
default_value_expression: str = None,
):
"""
Define a BigQuery table schema field.
Args:
name: Field name.
field_type: BigQuery data type (STRING, INTEGER, FLOAT, etc.).
mode: Field mode (NULLABLE, REQUIRED, REPEATED).
description: Field description.
fields: Nested fields for RECORD types.
policy_tags: Policy tags for column-level security.
precision: Precision for NUMERIC/BIGNUMERIC types.
scale: Scale for NUMERIC/BIGNUMERIC types.
max_length: Maximum length for STRING/BYTES types.
default_value_expression: Default value expression.
"""
@property
def name(self) -> str:
"""Field name."""
@property
def field_type(self) -> str:
"""BigQuery data type."""
@property
def mode(self) -> str:
"""Field mode (NULLABLE, REQUIRED, REPEATED)."""
@property
def description(self) -> str:
"""Field description."""
@property
def fields(self) -> Tuple[SchemaField, ...]:
"""Nested fields for RECORD types."""
@property
def is_nullable(self) -> bool:
"""True if field can contain NULL values."""
@property
def is_repeated(self) -> bool:
"""True if field is repeated (array)."""
def to_api_repr(self) -> Dict[str, Any]:
"""Convert to API representation."""
@classmethod
def from_api_repr(cls, api_repr: Dict[str, Any]) -> SchemaField:
"""Create from API representation."""Apply policy tags for column-level access control and data governance.
class PolicyTagList:
def __init__(self, names: List[str] = None):
"""
List of policy tag resource names for column-level security.
Args:
names: Policy tag resource names.
"""
@property
def names(self) -> List[str]:
"""Policy tag resource names."""
@names.setter
def names(self, value: List[str]): ...Define element types for repeated (array) fields with proper type constraints.
class FieldElementType:
def __init__(self, element_type: str):
"""
Element type for repeated fields.
Args:
element_type: BigQuery data type for array elements.
"""
@property
def element_type(self) -> str:
"""Element type for array fields."""Represent Standard SQL types for advanced type definitions and compatibility.
class StandardSqlDataType:
def __init__(self, **kwargs): ...
@property
def type_kind(self) -> str:
"""Type kind (INT64, STRING, ARRAY, STRUCT, etc.)."""
@property
def array_element_type(self) -> StandardSqlDataType:
"""Element type for ARRAY types."""
@property
def struct_type(self) -> StandardSqlStructType:
"""Struct definition for STRUCT types."""
class StandardSqlField:
def __init__(self, name: str = None, type: StandardSqlDataType = None):
"""
Field in Standard SQL type system.
Args:
name: Field name.
type: Field data type.
"""
@property
def name(self) -> str:
"""Field name."""
@property
def type(self) -> StandardSqlDataType:
"""Field data type."""
class StandardSqlStructType:
def __init__(self, fields: List[StandardSqlField] = None):
"""
Struct type definition in Standard SQL.
Args:
fields: List of struct fields.
"""
@property
def fields(self) -> List[StandardSqlField]:
"""Struct fields."""
class StandardSqlTableType:
def __init__(self, columns: List[StandardSqlField] = None):
"""
Table type definition in Standard SQL.
Args:
columns: List of table columns.
"""
@property
def columns(self) -> List[StandardSqlField]:
"""Table columns."""# String types
STRING_TYPE = "STRING" # Variable-length character string
BYTES_TYPE = "BYTES" # Variable-length byte sequence
# Numeric types
INTEGER_TYPE = "INTEGER" # 64-bit signed integer
INT64_TYPE = "INT64" # 64-bit signed integer (preferred)
FLOAT_TYPE = "FLOAT" # IEEE 754 double-precision
FLOAT64_TYPE = "FLOAT64" # IEEE 754 double-precision (preferred)
NUMERIC_TYPE = "NUMERIC" # High-precision decimal (38 digits, 9 after decimal)
BIGNUMERIC_TYPE = "BIGNUMERIC" # High-precision decimal (76 digits, 38 after decimal)
# Boolean type
BOOLEAN_TYPE = "BOOLEAN" # True/False values
BOOL_TYPE = "BOOL" # True/False values (preferred)
# Date and time types
DATE_TYPE = "DATE" # Calendar date (YYYY-MM-DD)
DATETIME_TYPE = "DATETIME" # Date and time (YYYY-MM-DD HH:MM:SS)
TIME_TYPE = "TIME" # Time of day (HH:MM:SS)
TIMESTAMP_TYPE = "TIMESTAMP" # Absolute point in time with timezone
# Geographic type
GEOGRAPHY_TYPE = "GEOGRAPHY" # Geographic data (points, lines, polygons)
# JSON type
JSON_TYPE = "JSON" # JSON document
# Complex types
RECORD_TYPE = "RECORD" # Nested structure
STRUCT_TYPE = "STRUCT" # Nested structure (preferred)NULLABLE_MODE = "NULLABLE" # Field can be NULL (default)
REQUIRED_MODE = "REQUIRED" # Field cannot be NULL
REPEATED_MODE = "REPEATED" # Field is an arrayfrom google.cloud.bigquery import SchemaField
# Simple table schema
schema = [
SchemaField("id", "INTEGER", mode="REQUIRED"),
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("email", "STRING", mode="NULLABLE"),
SchemaField("age", "INTEGER", mode="NULLABLE"),
SchemaField("is_active", "BOOLEAN", mode="NULLABLE"),
SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
]
# Create table with schema
table = bigquery.Table(f"{project_id}.{dataset_id}.{table_id}", schema=schema)
table = client.create_table(table)# Schema with nested RECORD fields
schema = [
SchemaField("user_id", "INTEGER", mode="REQUIRED"),
SchemaField("profile", "RECORD", mode="NULLABLE", fields=[
SchemaField("first_name", "STRING", mode="REQUIRED"),
SchemaField("last_name", "STRING", mode="REQUIRED"),
SchemaField("address", "RECORD", mode="NULLABLE", fields=[
SchemaField("street", "STRING", mode="NULLABLE"),
SchemaField("city", "STRING", mode="NULLABLE"),
SchemaField("state", "STRING", mode="NULLABLE"),
SchemaField("zip_code", "STRING", mode="NULLABLE"),
]),
]),
SchemaField("tags", "STRING", mode="REPEATED"), # Array of strings
SchemaField("scores", "FLOAT", mode="REPEATED"), # Array of floats
]# Schema with precision, scale, and policy tags
from google.cloud.bigquery import PolicyTagList
policy_tags = PolicyTagList(names=[
"projects/my-project/locations/us/taxonomies/123/policyTags/456"
])
schema = [
SchemaField("transaction_id", "STRING", mode="REQUIRED"),
SchemaField("amount", "NUMERIC", mode="REQUIRED",
precision=10, scale=2, description="Transaction amount in USD"),
SchemaField("customer_ssn", "STRING", mode="NULLABLE",
policy_tags=policy_tags, description="Customer SSN (PII)"),
SchemaField("metadata", "JSON", mode="NULLABLE"),
SchemaField("location", "GEOGRAPHY", mode="NULLABLE"),
SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED",
default_value_expression="CURRENT_TIMESTAMP()"),
]# Get existing table schema
table = client.get_table(f"{project_id}.{dataset_id}.{table_id}")
current_schema = table.schema
# Add new fields (schema evolution)
new_schema = list(current_schema)
new_schema.extend([
SchemaField("new_column", "STRING", mode="NULLABLE"),
SchemaField("calculated_field", "FLOAT", mode="NULLABLE"),
])
# Update table schema
table.schema = new_schema
table = client.update_table(table, ["schema"])def validate_schema(schema):
"""Validate schema definition."""
for field in schema:
print(f"Field: {field.name}")
print(f" Type: {field.field_type}")
print(f" Mode: {field.mode}")
print(f" Nullable: {field.is_nullable}")
print(f" Repeated: {field.is_repeated}")
if field.description:
print(f" Description: {field.description}")
if field.fields: # Nested fields
print(f" Nested fields:")
for nested_field in field.fields:
print(f" {nested_field.name}: {nested_field.field_type}")
# Load schema from JSON
import json
schema_json = [
{"name": "id", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "name", "type": "STRING", "mode": "REQUIRED"},
{"name": "metadata", "type": "RECORD", "mode": "REPEATED", "fields": [
{"name": "key", "type": "STRING", "mode": "REQUIRED"},
{"name": "value", "type": "STRING", "mode": "NULLABLE"}
]}
]
schema = [SchemaField.from_api_repr(field) for field in schema_json]
validate_schema(schema)def check_schema_compatibility(old_schema, new_schema):
"""Check if new schema is compatible with old schema."""
old_fields = {field.name: field for field in old_schema}
new_fields = {field.name: field for field in new_schema}
# Check for removed fields
removed_fields = set(old_fields.keys()) - set(new_fields.keys())
if removed_fields:
print(f"Warning: Fields removed: {removed_fields}")
# Check for type changes
for field_name in old_fields:
if field_name in new_fields:
old_field = old_fields[field_name]
new_field = new_fields[field_name]
if old_field.field_type != new_field.field_type:
print(f"Warning: Type changed for {field_name}: "
f"{old_field.field_type} -> {new_field.field_type}")
if old_field.mode == "REQUIRED" and new_field.mode != "REQUIRED":
print(f"OK: Field {field_name} changed from REQUIRED to {new_field.mode}")
if old_field.mode != "REQUIRED" and new_field.mode == "REQUIRED":
print(f"Error: Field {field_name} changed from {old_field.mode} to REQUIRED")
# Check for new fields
new_field_names = set(new_fields.keys()) - set(old_fields.keys())
for field_name in new_field_names:
field = new_fields[field_name]
if field.mode == "REQUIRED":
print(f"Error: New REQUIRED field added: {field_name}")
else:
print(f"OK: New optional field added: {field_name}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery