CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-bigquery

Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

models-routines.mddocs/

Models and Routines

BigQuery ML model management and user-defined functions (UDFs). Supports model creation, training, evaluation, prediction, and stored procedures. This capability enables machine learning workflows and custom function definitions within BigQuery.

Core Imports

from google.cloud.bigquery import Model, ModelReference, Routine, RoutineReference, RoutineArgument
from google.cloud.bigquery.routine import RoutineType, DeterminismLevel, RemoteFunctionOptions

Capabilities

Model Management

BigQuery ML models for machine learning workflows including training, evaluation, and prediction operations.

class Model:
    def __init__(self, model_ref: Union[str, ModelReference]): ...
    
    @property
    def reference(self) -> ModelReference: ...
    @property
    def model_id(self) -> str: ...
    @property
    def dataset_id(self) -> str: ...
    @property
    def project(self) -> str: ...
    @property
    def path(self) -> str: ...
    @property
    def created(self) -> datetime.datetime: ...
    @property
    def modified(self) -> datetime.datetime: ...
    @property
    def expires(self) -> datetime.datetime: ...
    @property
    def friendly_name(self) -> str: ...
    @property
    def description(self) -> str: ...
    @property
    def model_type(self) -> str: ...
    @property
    def training_runs(self) -> List[Dict[str, Any]]: ...
    @property
    def feature_columns(self) -> List[StandardSqlField]: ...
    @property
    def label_columns(self) -> List[StandardSqlField]: ...
    @property
    def location(self) -> str: ...
    @property
    def encryption_configuration(self) -> EncryptionConfiguration: ...

class ModelReference:
    def __init__(self, project: str, dataset_id: str, model_id: str): ...
    
    @property
    def project(self) -> str: ...
    @property
    def dataset_id(self) -> str: ...
    @property
    def model_id(self) -> str: ...
    @property
    def path(self) -> str: ...

Routine Management

User-defined functions (UDFs) and stored procedures for extending BigQuery SQL capabilities with custom logic.

class Routine:
    def __init__(self, routine_ref: Union[str, RoutineReference], routine_type: str = None): ...
    
    @property
    def reference(self) -> RoutineReference: ...
    @property
    def routine_id(self) -> str: ...
    @property
    def dataset_id(self) -> str: ...
    @property
    def project(self) -> str: ...
    @property
    def path(self) -> str: ...
    @property
    def created(self) -> datetime.datetime: ...
    @property
    def modified(self) -> datetime.datetime: ...
    @property
    def type_(self) -> str: ...
    @property
    def language(self) -> str: ...
    @property
    def arguments(self) -> List[RoutineArgument]: ...
    @property
    def return_type(self) -> StandardSqlDataType: ...
    @property
    def return_table_type(self) -> StandardSqlTableType: ...
    @property
    def body(self) -> str: ...
    @property
    def description(self) -> str: ...
    @property
    def determinism_level(self) -> str: ...
    @property
    def imported_libraries(self) -> List[str]: ...
    @property
    def remote_function_options(self) -> RemoteFunctionOptions: ...

class RoutineReference:
    def __init__(self, project: str, dataset_id: str, routine_id: str): ...
    
    @property
    def project(self) -> str: ...
    @property
    def dataset_id(self) -> str: ...
    @property
    def routine_id(self) -> str: ...
    @property
    def path(self) -> str: ...

class RoutineArgument:
    def __init__(self, name: str = None, argument_kind: str = None, mode: str = None, data_type: StandardSqlDataType = None): ...
    
    @property
    def name(self) -> str: ...
    @property
    def argument_kind(self) -> str: ...
    @property
    def mode(self) -> str: ...
    @property
    def data_type(self) -> StandardSqlDataType: ...

Routine and Model Types

Constants and enums for routine and model configuration.

class RoutineType:
    ROUTINE_TYPE_UNSPECIFIED: str
    SCALAR_FUNCTION: str
    PROCEDURE: str
    TABLE_VALUED_FUNCTION: str

class DeterminismLevel:
    DETERMINISM_LEVEL_UNSPECIFIED: str
    DETERMINISTIC: str
    NOT_DETERMINISTIC: str

class RemoteFunctionOptions:
    def __init__(self, endpoint: str = None, connection: str = None, user_defined_context: Dict[str, str] = None, max_batching_rows: int = None): ...
    
    @property
    def endpoint(self) -> str: ...
    @property
    def connection(self) -> str: ...
    @property
    def user_defined_context(self) -> Dict[str, str]: ...
    @property
    def max_batching_rows(self) -> int: ...

Usage Examples

Creating and Managing Models

from google.cloud import bigquery

client = bigquery.Client()

# Create a model reference
model_ref = bigquery.ModelReference(
    project="my-project",
    dataset_id="my_dataset", 
    model_id="my_model"
)

# Create model using ML.CREATE_MODEL SQL
create_model_query = """
CREATE OR REPLACE MODEL `my-project.my_dataset.my_model`
OPTIONS(
  model_type='linear_reg',
  input_label_cols=['label']
) AS
SELECT feature1, feature2, label
FROM `my-project.my_dataset.training_data`
"""

query_job = client.query(create_model_query)
query_job.result()

# Get model information
model = client.get_model(model_ref)
print(f"Model created: {model.created}")
print(f"Model type: {model.model_type}")
print(f"Training runs: {len(model.training_runs)}")

# List models in dataset
models = client.list_models("my-project.my_dataset")
for model in models:
    print(f"Model: {model.model_id}")

Creating User-Defined Functions

from google.cloud import bigquery

client = bigquery.Client()

# Create a scalar UDF
routine = bigquery.Routine(
    routine_ref="my-project.my_dataset.calculate_distance",
    routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)

routine.language = "SQL"
routine.body = """
    SQRT(POW(x2 - x1, 2) + POW(y2 - y1, 2))
"""

routine.arguments = [
    bigquery.RoutineArgument(
        name="x1",
        data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
    ),
    bigquery.RoutineArgument(
        name="y1", 
        data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
    ),
    bigquery.RoutineArgument(
        name="x2",
        data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
    ),
    bigquery.RoutineArgument(
        name="y2",
        data_type=bigquery.StandardSqlDataType(type_kind="FLOAT64")
    )
]

routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")

# Create the routine
routine = client.create_routine(routine)
print(f"Created routine: {routine.routine_id}")

# Use the UDF in a query
query = """
SELECT 
    point_a,
    point_b,
    `my-project.my_dataset.calculate_distance`(ax, ay, bx, by) as distance
FROM `my-project.my_dataset.points_table`
"""

query_job = client.query(query)
results = query_job.result()

JavaScript UDF Example

# Create a JavaScript UDF for more complex logic
js_routine = bigquery.Routine(
    routine_ref="my-project.my_dataset.parse_user_agent",
    routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)

js_routine.language = "JAVASCRIPT"
js_routine.body = """
    var parts = user_agent.split(' ');
    var browser = 'Unknown';
    
    if (user_agent.indexOf('Chrome') !== -1) {
        browser = 'Chrome';
    } else if (user_agent.indexOf('Firefox') !== -1) {
        browser = 'Firefox';
    } else if (user_agent.indexOf('Safari') !== -1) {
        browser = 'Safari';
    }
    
    return browser;
"""

js_routine.arguments = [
    bigquery.RoutineArgument(
        name="user_agent",
        data_type=bigquery.StandardSqlDataType(type_kind="STRING")
    )
]

js_routine.return_type = bigquery.StandardSqlDataType(type_kind="STRING")

# Create and use the JavaScript UDF
routine = client.create_routine(js_routine)

Remote Functions

# Create a remote function that calls an external API
remote_routine = bigquery.Routine(
    routine_ref="my-project.my_dataset.sentiment_analysis",
    routine_type=bigquery.RoutineType.SCALAR_FUNCTION
)

remote_routine.remote_function_options = bigquery.RemoteFunctionOptions(
    endpoint="https://my-cloud-function-url",
    connection="projects/my-project/locations/us/connections/my-connection",
    max_batching_rows=1000
)

remote_routine.arguments = [
    bigquery.RoutineArgument(
        name="text_input",
        data_type=bigquery.StandardSqlDataType(type_kind="STRING")
    )
]

remote_routine.return_type = bigquery.StandardSqlDataType(type_kind="FLOAT64")

# Create the remote function
routine = client.create_routine(remote_routine)

Models and routines are managed through the client with create, get, update, list, and delete operations similar to tables and datasets. They provide powerful extensibility for BigQuery's analytical and machine learning capabilities.

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-bigquery

docs

client-operations.md

data-loading.md

database-api.md

dataset-management.md

index.md

models-routines.md

query-operations.md

query-parameters.md

schema-definition.md

table-operations.md

tile.json