Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
BigQuery ML model management and user-defined functions (UDFs). Supports model creation, training, evaluation, prediction, and stored procedures. This capability enables machine learning workflows and custom function definitions within BigQuery.
from google.cloud.bigquery import Model, ModelReference, Routine, RoutineReference, RoutineArgument
from google.cloud.bigquery.routine import RoutineType, DeterminismLevel, RemoteFunctionOptionsBigQuery ML models for machine learning workflows including training, evaluation, and prediction operations.
class Model:
def __init__(self, model_ref: Union[str, ModelReference]): ...
@property
def reference(self) -> ModelReference: ...
@property
def model_id(self) -> str: ...
@property
def dataset_id(self) -> str: ...
@property
def project(self) -> str: ...
@property
def path(self) -> str: ...
@property
def created(self) -> datetime.datetime: ...
@property
def modified(self) -> datetime.datetime: ...
@property
def expires(self) -> datetime.datetime: ...
@property
def friendly_name(self) -> str: ...
@property
def description(self) -> str: ...
@property
def model_type(self) -> str: ...
@property
def training_runs(self) -> List[Dict[str, Any]]: ...
@property
def feature_columns(self) -> List[StandardSqlField]: ...
@property
def label_columns(self) -> List[StandardSqlField]: ...
@property
def location(self) -> str: ...
@property
def encryption_configuration(self) -> EncryptionConfiguration: ...
class ModelReference:
def __init__(self, project: str, dataset_id: str, model_id: str): ...
@property
def project(self) -> str: ...
@property
def dataset_id(self) -> str: ...
@property
def model_id(self) -> str: ...
@property
def path(self) -> str: ...User-defined functions (UDFs) and stored procedures for extending BigQuery SQL capabilities with custom logic.
class Routine:
def __init__(self, routine_ref: Union[str, RoutineReference], routine_type: str = None): ...
@property
def reference(self) -> RoutineReference: ...
@property
def routine_id(self) -> str: ...
@property
def dataset_id(self) -> str: ...
@property
def project(self) -> str: ...
@property
def path(self) -> str: ...
@property
def created(self) -> datetime.datetime: ...
@property
def modified(self) -> datetime.datetime: ...
@property
def type_(self) -> str: ...
@property
def language(self) -> str: ...
@property
def arguments(self) -> List[RoutineArgument]: ...
@property
def return_type(self) -> StandardSqlDataType: ...
@property
def return_table_type(self) -> StandardSqlTableType: ...
@property
def body(self) -> str: ...
@property
def description(self) -> str: ...
@property
def determinism_level(self) -> str: ...
@property
def imported_libraries(self) -> List[str]: ...
@property
def remote_function_options(self) -> RemoteFunctionOptions: ...
class RoutineReference:
def __init__(self, project: str, dataset_id: str, routine_id: str): ...
@property
def project(self) -> str: ...
@property
def dataset_id(self) -> str: ...
@property
def routine_id(self) -> str: ...
@property
def path(self) -> str: ...
class RoutineArgument:
def __init__(self, name: str = None, argument_kind: str = None, mode: str = None, data_type: StandardSqlDataType = None): ...
@property
def name(self) -> str: ...
@property
def argument_kind(self) -> str: ...
@property
def mode(self) -> str: ...
@property
def data_type(self) -> StandardSqlDataType: ...Constants and enums for routine and model configuration.
class RoutineType:
ROUTINE_TYPE_UNSPECIFIED: str
SCALAR_FUNCTION: str
PROCEDURE: str
TABLE_VALUED_FUNCTION: str
class DeterminismLevel:
DETERMINISM_LEVEL_UNSPECIFIED: str
DETERMINISTIC: str
NOT_DETERMINISTIC: str
class RemoteFunctionOptions:
def __init__(self, endpoint: str = None, connection: str = None, user_defined_context: Dict[str, str] = None, max_batching_rows: int = None): ...
@property
def endpoint(self) -> str: ...
@property
def connection(self) -> str: ...
@property
def user_defined_context(self) -> Dict[str, str]: ...
@property
def max_batching_rows(self) -> int: ...from google.cloud import bigquery
client = bigquery.Client()
# Create a model reference
model_ref = bigquery.ModelReference(
project="my-project",
dataset_id="my_dataset",
model_id="my_model"
)
# Create model using ML.CREATE_MODEL SQL
create_model_query = """
CREATE OR REPLACE MODEL `my-project.my_dataset.my_model`
OPTIONS(
model_type='linear_reg',
input_label_cols=['label']
) AS
SELECT feature1, feature2, label
FROM `my-project.my_dataset.training_data`
"""
query_job = client.query(create_model_query)
query_job.result()
# Get model information
model = client.get_model(model_ref)
print(f"Model created: {model.created}")
print(f"Model type: {model.model_type}")
print(f"Training runs: {len(model.training_runs)}")
# List models in dataset
models = client.list_models("my-project.my_dataset")
for model in models:
print(f"Model: {model.model_id}")from google.cloud import bigquery
client = bigquery.Client()
# Create a scalar UDF
routine = bigquery.Routine(
routine_ref="my-project.my_dataset.calculate_distance",
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)
routine.language = "SQL"
routine.body = """
SQRT(POW(x2 - x1, 2) + POW(y2 - y1, 2))
"""
routine.arguments = [
bigquery.RoutineArgument(
name="x1",
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
),
bigquery.RoutineArgument(
name="y1",
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
),
bigquery.RoutineArgument(
name="x2",
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
),
bigquery.RoutineArgument(
name="y2",
data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
)
]
routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")
# Create the routine
routine = client.create_routine(routine)
print(f"Created routine: {routine.routine_id}")
# Use the UDF in a query
query = """
SELECT
point_a,
point_b,
`my-project.my_dataset.calculate_distance`(ax, ay, bx, by) as distance
FROM `my-project.my_dataset.points_table`
"""
query_job = client.query(query)
results = query_job.result()# Create a JavaScript UDF for more complex logic
js_routine = bigquery.Routine(
routine_ref="my-project.my_dataset.parse_user_agent",
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)
js_routine.language = "JAVASCRIPT"
js_routine.body = """
var parts = user_agent.split(' ');
var browser = 'Unknown';
if (user_agent.indexOf('Chrome') !== -1) {
browser = 'Chrome';
} else if (user_agent.indexOf('Firefox') !== -1) {
browser = 'Firefox';
} else if (user_agent.indexOf('Safari') !== -1) {
browser = 'Safari';
}
return browser;
"""
js_routine.arguments = [
bigquery.RoutineArgument(
name="user_agent",
data_type=bigquery.StandardSqlDataType(type_kind="STRING")
)
]
js_routine.return_type = bigquery.StandardSqlDataType(type_kind="STRING")
# Create and use the JavaScript UDF
routine = client.create_routine(js_routine)# Create a remote function that calls an external API
remote_routine = bigquery.Routine(
routine_ref="my-project.my_dataset.sentiment_analysis",
routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)
remote_routine.remote_function_options = bigquery.RemoteFunctionOptions(
endpoint="https://my-cloud-function-url",
connection="projects/my-project/locations/us/connections/my-connection",
max_batching_rows=1000
)
remote_routine.arguments = [
bigquery.RoutineArgument(
name="text_input",
data_type=bigquery.StandardSqlDataType(type_kind="STRING")
)
]
remote_routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")
# Create the remote function
routine = client.create_routine(remote_routine)Models and routines are managed through the client with create, get, update, list, and delete operations similar to tables and datasets. They provide powerful extensibility for BigQuery's analytical and machine learning capabilities.
Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery