0
# Models and Routines
1
2
BigQuery ML model management and user-defined functions (UDFs). Supports model creation, training, evaluation, prediction, and stored procedures. This capability enables machine learning workflows and custom function definitions within BigQuery.
3
4
## Core Imports
5
6
```python
7
from google.cloud.bigquery import Model, ModelReference, Routine, RoutineReference, RoutineArgument
8
from google.cloud.bigquery.routine import RoutineType, DeterminismLevel, RemoteFunctionOptions
9
```
10
11
## Capabilities
12
13
### Model Management
14
15
BigQuery ML models for machine learning workflows including training, evaluation, and prediction operations.
16
17
```python { .api }
18
class Model:
19
def __init__(self, model_ref: Union[str, ModelReference]): ...
20
21
@property
22
def reference(self) -> ModelReference: ...
23
@property
24
def model_id(self) -> str: ...
25
@property
26
def dataset_id(self) -> str: ...
27
@property
28
def project(self) -> str: ...
29
@property
30
def path(self) -> str: ...
31
@property
32
def created(self) -> datetime.datetime: ...
33
@property
34
def modified(self) -> datetime.datetime: ...
35
@property
36
def expires(self) -> datetime.datetime: ...
37
@property
38
def friendly_name(self) -> str: ...
39
@property
40
def description(self) -> str: ...
41
@property
42
def model_type(self) -> str: ...
43
@property
44
def training_runs(self) -> List[Dict[str, Any]]: ...
45
@property
46
def feature_columns(self) -> List[StandardSqlField]: ...
47
@property
48
def label_columns(self) -> List[StandardSqlField]: ...
49
@property
50
def location(self) -> str: ...
51
@property
52
def encryption_configuration(self) -> EncryptionConfiguration: ...
53
54
class ModelReference:
55
def __init__(self, project: str, dataset_id: str, model_id: str): ...
56
57
@property
58
def project(self) -> str: ...
59
@property
60
def dataset_id(self) -> str: ...
61
@property
62
def model_id(self) -> str: ...
63
@property
64
def path(self) -> str: ...
65
```
66
67
### Routine Management
68
69
User-defined functions (UDFs) and stored procedures for extending BigQuery SQL capabilities with custom logic.
70
71
```python { .api }
72
class Routine:
73
def __init__(self, routine_ref: Union[str, RoutineReference], routine_type: str = None): ...
74
75
@property
76
def reference(self) -> RoutineReference: ...
77
@property
78
def routine_id(self) -> str: ...
79
@property
80
def dataset_id(self) -> str: ...
81
@property
82
def project(self) -> str: ...
83
@property
84
def path(self) -> str: ...
85
@property
86
def created(self) -> datetime.datetime: ...
87
@property
88
def modified(self) -> datetime.datetime: ...
89
@property
90
def type_(self) -> str: ...
91
@property
92
def language(self) -> str: ...
93
@property
94
def arguments(self) -> List[RoutineArgument]: ...
95
@property
96
def return_type(self) -> StandardSqlDataType: ...
97
@property
98
def return_table_type(self) -> StandardSqlTableType: ...
99
@property
100
def body(self) -> str: ...
101
@property
102
def description(self) -> str: ...
103
@property
104
def determinism_level(self) -> str: ...
105
@property
106
def imported_libraries(self) -> List[str]: ...
107
@property
108
def remote_function_options(self) -> RemoteFunctionOptions: ...
109
110
class RoutineReference:
111
def __init__(self, project: str, dataset_id: str, routine_id: str): ...
112
113
@property
114
def project(self) -> str: ...
115
@property
116
def dataset_id(self) -> str: ...
117
@property
118
def routine_id(self) -> str: ...
119
@property
120
def path(self) -> str: ...
121
122
class RoutineArgument:
123
def __init__(self, name: str = None, argument_kind: str = None, mode: str = None, data_type: StandardSqlDataType = None): ...
124
125
@property
126
def name(self) -> str: ...
127
@property
128
def argument_kind(self) -> str: ...
129
@property
130
def mode(self) -> str: ...
131
@property
132
def data_type(self) -> StandardSqlDataType: ...
133
```
134
135
### Routine and Model Types
136
137
Constants and enums for routine and model configuration.
138
139
```python { .api }
140
class RoutineType:
141
ROUTINE_TYPE_UNSPECIFIED: str
142
SCALAR_FUNCTION: str
143
PROCEDURE: str
144
TABLE_VALUED_FUNCTION: str
145
146
class DeterminismLevel:
147
DETERMINISM_LEVEL_UNSPECIFIED: str
148
DETERMINISTIC: str
149
NOT_DETERMINISTIC: str
150
151
class RemoteFunctionOptions:
152
def __init__(self, endpoint: str = None, connection: str = None, user_defined_context: Dict[str, str] = None, max_batching_rows: int = None): ...
153
154
@property
155
def endpoint(self) -> str: ...
156
@property
157
def connection(self) -> str: ...
158
@property
159
def user_defined_context(self) -> Dict[str, str]: ...
160
@property
161
def max_batching_rows(self) -> int: ...
162
```
163
164
## Usage Examples
165
166
### Creating and Managing Models
167
168
```python
169
from google.cloud import bigquery
170
171
client = bigquery.Client()
172
173
# Create a model reference
174
model_ref = bigquery.ModelReference(
175
project="my-project",
176
dataset_id="my_dataset",
177
model_id="my_model"
178
)
179
180
# Create model using ML.CREATE_MODEL SQL
181
create_model_query = """
182
CREATE OR REPLACE MODEL `my-project.my_dataset.my_model`
183
OPTIONS(
184
model_type='linear_reg',
185
input_label_cols=['label']
186
) AS
187
SELECT feature1, feature2, label
188
FROM `my-project.my_dataset.training_data`
189
"""
190
191
query_job = client.query(create_model_query)
192
query_job.result()
193
194
# Get model information
195
model = client.get_model(model_ref)
196
print(f"Model created: {model.created}")
197
print(f"Model type: {model.model_type}")
198
print(f"Training runs: {len(model.training_runs)}")
199
200
# List models in dataset
201
models = client.list_models("my-project.my_dataset")
202
for model in models:
203
print(f"Model: {model.model_id}")
204
```
205
206
### Creating User-Defined Functions
207
208
```python
209
from google.cloud import bigquery
210
211
client = bigquery.Client()
212
213
# Create a scalar UDF
214
routine = bigquery.Routine(
215
routine_ref="my-project.my_dataset.calculate_distance",
216
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
217
)
218
219
routine.language = "SQL"
220
routine.body = """
221
SQRT(POW(x2 - x1, 2) + POW(y2 - y1, 2))
222
"""
223
224
routine.arguments = [
225
bigquery.RoutineArgument(
226
name="x1",
227
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
228
),
229
bigquery.RoutineArgument(
230
name="y1",
231
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
232
),
233
bigquery.RoutineArgument(
234
name="x2",
235
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
236
),
237
bigquery.RoutineArgument(
238
name="y2",
239
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
240
)
241
]
242
243
routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")
244
245
# Create the routine
246
routine = client.create_routine(routine)
247
print(f"Created routine: {routine.routine_id}")
248
249
# Use the UDF in a query
250
query = """
251
SELECT
252
point_a,
253
point_b,
254
`my-project.my_dataset.calculate_distance`(ax, ay, bx, by) as distance
255
FROM `my-project.my_dataset.points_table`
256
"""
257
258
query_job = client.query(query)
259
results = query_job.result()
260
```
261
262
### JavaScript UDF Example
263
264
```python
265
# Create a JavaScript UDF for more complex logic
266
js_routine = bigquery.Routine(
267
routine_ref="my-project.my_dataset.parse_user_agent",
268
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
269
)
270
271
js_routine.language = "JAVASCRIPT"
272
js_routine.body = """
273
var parts = user_agent.split(' ');
274
var browser = 'Unknown';
275
276
if (user_agent.indexOf('Chrome') !== -1) {
277
browser = 'Chrome';
278
} else if (user_agent.indexOf('Firefox') !== -1) {
279
browser = 'Firefox';
280
} else if (user_agent.indexOf('Safari') !== -1) {
281
browser = 'Safari';
282
}
283
284
return browser;
285
"""
286
287
js_routine.arguments = [
288
bigquery.RoutineArgument(
289
name="user_agent",
290
data_type=bigquery.StandardSqlDataType(type_kind="STRING")
291
)
292
]
293
294
js_routine.return_type = bigquery.StandardSqlDataType(type_kind="STRING")
295
296
# Create and use the JavaScript UDF
297
routine = client.create_routine(js_routine)
298
```
299
300
### Remote Functions
301
302
```python
303
# Create a remote function that calls an external API
304
remote_routine = bigquery.Routine(
305
routine_ref="my-project.my_dataset.sentiment_analysis",
306
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
307
)
308
309
remote_routine.remote_function_options = bigquery.RemoteFunctionOptions(
310
endpoint="https://my-cloud-function-url",
311
connection="projects/my-project/locations/us/connections/my-connection",
312
max_batching_rows=1000
313
)
314
315
remote_routine.arguments = [
316
bigquery.RoutineArgument(
317
name="text_input",
318
data_type=bigquery.StandardSqlDataType(type_kind="STRING")
319
)
320
]
321
322
remote_routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")
323
324
# Create the remote function
325
routine = client.create_routine(remote_routine)
326
```
327
328
Models and routines are managed through the client with create, get, update, list, and delete operations similar to tables and datasets. They provide powerful extensibility for BigQuery's analytical and machine learning capabilities.