or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-operations.mddata-loading.mddatabase-api.mddataset-management.mdindex.mdmodels-routines.mdquery-operations.mdquery-parameters.mdschema-definition.mdtable-operations.md

schema-definition.mddocs/

0

# Schema Definition

1

2

Type-safe schema definition with field specifications, modes, and descriptions. BigQuery schemas define the structure of tables and query results, ensuring data integrity and enabling proper type handling across the API.

3

4

## Capabilities

5

6

### Schema Field Definition

7

8

Define individual fields in a BigQuery table schema with complete type information and constraints.

9

10

```python { .api }

11

class SchemaField:

12

def __init__(

13

self,

14

name: str,

15

field_type: str,

16

mode: str = "NULLABLE",

17

description: str = None,

18

fields: Tuple[SchemaField, ...] = (),

19

policy_tags: PolicyTagList = None,

20

precision: int = None,

21

scale: int = None,

22

max_length: int = None,

23

default_value_expression: str = None,

24

):

25

"""

26

Define a BigQuery table schema field.

27

28

Args:

29

name: Field name.

30

field_type: BigQuery data type (STRING, INTEGER, FLOAT, etc.).

31

mode: Field mode (NULLABLE, REQUIRED, REPEATED).

32

description: Field description.

33

fields: Nested fields for RECORD types.

34

policy_tags: Policy tags for column-level security.

35

precision: Precision for NUMERIC/BIGNUMERIC types.

36

scale: Scale for NUMERIC/BIGNUMERIC types.

37

max_length: Maximum length for STRING/BYTES types.

38

default_value_expression: Default value expression.

39

"""

40

41

@property

42

def name(self) -> str:

43

"""Field name."""

44

45

@property

46

def field_type(self) -> str:

47

"""BigQuery data type."""

48

49

@property

50

def mode(self) -> str:

51

"""Field mode (NULLABLE, REQUIRED, REPEATED)."""

52

53

@property

54

def description(self) -> str:

55

"""Field description."""

56

57

@property

58

def fields(self) -> Tuple[SchemaField, ...]:

59

"""Nested fields for RECORD types."""

60

61

@property

62

def is_nullable(self) -> bool:

63

"""True if field can contain NULL values."""

64

65

@property

66

def is_repeated(self) -> bool:

67

"""True if field is repeated (array)."""

68

69

def to_api_repr(self) -> Dict[str, Any]:

70

"""Convert to API representation."""

71

72

@classmethod

73

def from_api_repr(cls, api_repr: Dict[str, Any]) -> SchemaField:

74

"""Create from API representation."""

75

```

76

77

### Policy Tags for Column-Level Security

78

79

Apply policy tags for column-level access control and data governance.

80

81

```python { .api }

82

class PolicyTagList:

83

def __init__(self, names: List[str] = None):

84

"""

85

List of policy tag resource names for column-level security.

86

87

Args:

88

names: Policy tag resource names.

89

"""

90

91

@property

92

def names(self) -> List[str]:

93

"""Policy tag resource names."""

94

95

@names.setter

96

def names(self, value: List[str]): ...

97

```

98

99

### Field Element Types

100

101

Define element types for repeated (array) fields with proper type constraints.

102

103

```python { .api }

104

class FieldElementType:

105

def __init__(self, element_type: str):

106

"""

107

Element type for repeated fields.

108

109

Args:

110

element_type: BigQuery data type for array elements.

111

"""

112

113

@property

114

def element_type(self) -> str:

115

"""Element type for array fields."""

116

```

117

118

### Standard SQL Type System

119

120

Represent Standard SQL types for advanced type definitions and compatibility.

121

122

```python { .api }

123

class StandardSqlDataType:

124

def __init__(self, **kwargs): ...

125

126

@property

127

def type_kind(self) -> str:

128

"""Type kind (INT64, STRING, ARRAY, STRUCT, etc.)."""

129

130

@property

131

def array_element_type(self) -> StandardSqlDataType:

132

"""Element type for ARRAY types."""

133

134

@property

135

def struct_type(self) -> StandardSqlStructType:

136

"""Struct definition for STRUCT types."""

137

138

class StandardSqlField:

139

def __init__(self, name: str = None, type: StandardSqlDataType = None):

140

"""

141

Field in Standard SQL type system.

142

143

Args:

144

name: Field name.

145

type: Field data type.

146

"""

147

148

@property

149

def name(self) -> str:

150

"""Field name."""

151

152

@property

153

def type(self) -> StandardSqlDataType:

154

"""Field data type."""

155

156

class StandardSqlStructType:

157

def __init__(self, fields: List[StandardSqlField] = None):

158

"""

159

Struct type definition in Standard SQL.

160

161

Args:

162

fields: List of struct fields.

163

"""

164

165

@property

166

def fields(self) -> List[StandardSqlField]:

167

"""Struct fields."""

168

169

class StandardSqlTableType:

170

def __init__(self, columns: List[StandardSqlField] = None):

171

"""

172

Table type definition in Standard SQL.

173

174

Args:

175

columns: List of table columns.

176

"""

177

178

@property

179

def columns(self) -> List[StandardSqlField]:

180

"""Table columns."""

181

```

182

183

## Data Types Reference

184

185

### Basic Types

186

187

```python { .api }

188

# String types

189

STRING_TYPE = "STRING" # Variable-length character string

190

BYTES_TYPE = "BYTES" # Variable-length byte sequence

191

192

# Numeric types

193

INTEGER_TYPE = "INTEGER" # 64-bit signed integer

194

INT64_TYPE = "INT64" # 64-bit signed integer (preferred)

195

FLOAT_TYPE = "FLOAT" # IEEE 754 double-precision

196

FLOAT64_TYPE = "FLOAT64" # IEEE 754 double-precision (preferred)

197

NUMERIC_TYPE = "NUMERIC" # High-precision decimal (38 digits, 9 after decimal)

198

BIGNUMERIC_TYPE = "BIGNUMERIC" # High-precision decimal (76 digits, 38 after decimal)

199

200

# Boolean type

201

BOOLEAN_TYPE = "BOOLEAN" # True/False values

202

BOOL_TYPE = "BOOL" # True/False values (preferred)

203

204

# Date and time types

205

DATE_TYPE = "DATE" # Calendar date (YYYY-MM-DD)

206

DATETIME_TYPE = "DATETIME" # Date and time (YYYY-MM-DD HH:MM:SS)

207

TIME_TYPE = "TIME" # Time of day (HH:MM:SS)

208

TIMESTAMP_TYPE = "TIMESTAMP" # Absolute point in time with timezone

209

210

# Geographic type

211

GEOGRAPHY_TYPE = "GEOGRAPHY" # Geographic data (points, lines, polygons)

212

213

# JSON type

214

JSON_TYPE = "JSON" # JSON document

215

216

# Complex types

217

RECORD_TYPE = "RECORD" # Nested structure

218

STRUCT_TYPE = "STRUCT" # Nested structure (preferred)

219

```

220

221

### Field Modes

222

223

```python { .api }

224

NULLABLE_MODE = "NULLABLE" # Field can be NULL (default)

225

REQUIRED_MODE = "REQUIRED" # Field cannot be NULL

226

REPEATED_MODE = "REPEATED" # Field is an array

227

```

228

229

## Usage Examples

230

231

### Basic Schema Definition

232

233

```python

234

from google.cloud.bigquery import SchemaField

235

236

# Simple table schema

237

schema = [

238

SchemaField("id", "INTEGER", mode="REQUIRED"),

239

SchemaField("name", "STRING", mode="REQUIRED"),

240

SchemaField("email", "STRING", mode="NULLABLE"),

241

SchemaField("age", "INTEGER", mode="NULLABLE"),

242

SchemaField("is_active", "BOOLEAN", mode="NULLABLE"),

243

SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),

244

]

245

246

# Create table with schema

247

table = bigquery.Table(f"{project_id}.{dataset_id}.{table_id}", schema=schema)

248

table = client.create_table(table)

249

```

250

251

### Complex Schema with Nested Fields

252

253

```python

254

# Schema with nested RECORD fields

255

schema = [

256

SchemaField("user_id", "INTEGER", mode="REQUIRED"),

257

SchemaField("profile", "RECORD", mode="NULLABLE", fields=[

258

SchemaField("first_name", "STRING", mode="REQUIRED"),

259

SchemaField("last_name", "STRING", mode="REQUIRED"),

260

SchemaField("address", "RECORD", mode="NULLABLE", fields=[

261

SchemaField("street", "STRING", mode="NULLABLE"),

262

SchemaField("city", "STRING", mode="NULLABLE"),

263

SchemaField("state", "STRING", mode="NULLABLE"),

264

SchemaField("zip_code", "STRING", mode="NULLABLE"),

265

]),

266

]),

267

SchemaField("tags", "STRING", mode="REPEATED"), # Array of strings

268

SchemaField("scores", "FLOAT", mode="REPEATED"), # Array of floats

269

]

270

```

271

272

### Schema with Advanced Types

273

274

```python

275

# Schema with precision, scale, and policy tags

276

from google.cloud.bigquery import PolicyTagList

277

278

policy_tags = PolicyTagList(names=[

279

"projects/my-project/locations/us/taxonomies/123/policyTags/456"

280

])

281

282

schema = [

283

SchemaField("transaction_id", "STRING", mode="REQUIRED"),

284

SchemaField("amount", "NUMERIC", mode="REQUIRED",

285

precision=10, scale=2, description="Transaction amount in USD"),

286

SchemaField("customer_ssn", "STRING", mode="NULLABLE",

287

policy_tags=policy_tags, description="Customer SSN (PII)"),

288

SchemaField("metadata", "JSON", mode="NULLABLE"),

289

SchemaField("location", "GEOGRAPHY", mode="NULLABLE"),

290

SchemaField("event_timestamp", "TIMESTAMP", mode="REQUIRED",

291

default_value_expression="CURRENT_TIMESTAMP()"),

292

]

293

```

294

295

### Working with Schema Evolution

296

297

```python

298

# Get existing table schema

299

table = client.get_table(f"{project_id}.{dataset_id}.{table_id}")

300

current_schema = table.schema

301

302

# Add new fields (schema evolution)

303

new_schema = list(current_schema)

304

new_schema.extend([

305

SchemaField("new_column", "STRING", mode="NULLABLE"),

306

SchemaField("calculated_field", "FLOAT", mode="NULLABLE"),

307

])

308

309

# Update table schema

310

table.schema = new_schema

311

table = client.update_table(table, ["schema"])

312

```

313

314

### Schema Validation and Introspection

315

316

```python

317

def validate_schema(schema):

318

"""Validate schema definition."""

319

for field in schema:

320

print(f"Field: {field.name}")

321

print(f" Type: {field.field_type}")

322

print(f" Mode: {field.mode}")

323

print(f" Nullable: {field.is_nullable}")

324

print(f" Repeated: {field.is_repeated}")

325

326

if field.description:

327

print(f" Description: {field.description}")

328

329

if field.fields: # Nested fields

330

print(f" Nested fields:")

331

for nested_field in field.fields:

332

print(f" {nested_field.name}: {nested_field.field_type}")

333

334

# Load schema from JSON

335

import json

336

337

schema_json = [

338

{"name": "id", "type": "INTEGER", "mode": "REQUIRED"},

339

{"name": "name", "type": "STRING", "mode": "REQUIRED"},

340

{"name": "metadata", "type": "RECORD", "mode": "REPEATED", "fields": [

341

{"name": "key", "type": "STRING", "mode": "REQUIRED"},

342

{"name": "value", "type": "STRING", "mode": "NULLABLE"}

343

]}

344

]

345

346

schema = [SchemaField.from_api_repr(field) for field in schema_json]

347

validate_schema(schema)

348

```

349

350

### Schema Compatibility Checking

351

352

```python

353

def check_schema_compatibility(old_schema, new_schema):

354

"""Check if new schema is compatible with old schema."""

355

old_fields = {field.name: field for field in old_schema}

356

new_fields = {field.name: field for field in new_schema}

357

358

# Check for removed fields

359

removed_fields = set(old_fields.keys()) - set(new_fields.keys())

360

if removed_fields:

361

print(f"Warning: Fields removed: {removed_fields}")

362

363

# Check for type changes

364

for field_name in old_fields:

365

if field_name in new_fields:

366

old_field = old_fields[field_name]

367

new_field = new_fields[field_name]

368

369

if old_field.field_type != new_field.field_type:

370

print(f"Warning: Type changed for {field_name}: "

371

f"{old_field.field_type} -> {new_field.field_type}")

372

373

if old_field.mode == "REQUIRED" and new_field.mode != "REQUIRED":

374

print(f"OK: Field {field_name} changed from REQUIRED to {new_field.mode}")

375

376

if old_field.mode != "REQUIRED" and new_field.mode == "REQUIRED":

377

print(f"Error: Field {field_name} changed from {old_field.mode} to REQUIRED")

378

379

# Check for new fields

380

new_field_names = set(new_fields.keys()) - set(old_fields.keys())

381

for field_name in new_field_names:

382

field = new_fields[field_name]

383

if field.mode == "REQUIRED":

384

print(f"Error: New REQUIRED field added: {field_name}")

385

else:

386

print(f"OK: New optional field added: {field_name}")

387

```