Python client library for managing BigQuery Data Transfer Service operations and scheduling data transfers from partner SaaS applications.
—
Comprehensive scheduling options and configuration types for defining when and how data transfers execute, including time-based, manual, and event-driven scheduling patterns.
Basic scheduling options for data transfers.
class ScheduleOptions:
"""
Options customizing the data transfer schedule.
Attributes:
disable_auto_scheduling (bool): If set to true, disable automatic scheduling of data transfer runs.
start_time (Timestamp): Specifies time to start scheduling transfer runs.
end_time (Timestamp): Defines time to stop scheduling transfer runs.
"""
disable_auto_scheduling: bool
start_time: timestamp_pb2.Timestamp
end_time: timestamp_pb2.TimestampEnhanced scheduling options with more flexibility for data transfers.
class ScheduleOptionsV2:
"""
Options customizing the data transfer schedule (version 2).
Attributes:
time_based_schedule (TimeBasedSchedule): Time based transfer schedule options.
manual_schedule (ManualSchedule): Manual transfer schedule options.
event_driven_schedule (EventDrivenSchedule): Event driven transfer schedule options.
"""
time_based_schedule: TimeBasedSchedule
manual_schedule: ManualSchedule
event_driven_schedule: EventDrivenScheduleTime-based scheduling configuration for regular, scheduled data transfers.
class TimeBasedSchedule:
"""
Options customizing time based transfer schedule.
Attributes:
schedule (str): Data transfer schedule in unix-cron format.
start_time (Timestamp): Specifies time to start scheduling transfer runs.
end_time (Timestamp): Defines time to stop scheduling transfer runs.
"""
schedule: str
start_time: timestamp_pb2.Timestamp
end_time: timestamp_pb2.TimestampManual scheduling configuration for on-demand data transfers.
class ManualSchedule:
"""
Options customizing manual transfer schedule.
This message has no fields and is used as a marker for manual scheduling.
"""
passEvent-driven scheduling configuration for data transfers triggered by external events.
class EventDrivenSchedule:
"""
Options customizing event driven transfer schedule.
Attributes:
pubsub_subscription (str): Pub/Sub subscription name used to receive events.
"""
pubsub_subscription: strEmail notification preferences for transfer run events.
class EmailPreferences:
"""
Represents preferences for sending email notifications for transfer run events.
Attributes:
enable_failure_email (bool): If true, email notifications will be sent on transfer run failures.
"""
enable_failure_email: boolEncryption configuration for data transfers.
class EncryptionConfiguration:
"""
Represents the encryption configuration for data transfers.
Attributes:
kms_key_name (str): The name of the KMS key used for encryption.
"""
kms_key_name: strclass TransferState(proto.Enum):
"""
Represents data transfer run state.
Values:
TRANSFER_STATE_UNSPECIFIED (0): State placeholder.
PENDING (2): Data transfer is scheduled and waiting to be picked up.
RUNNING (3): Data transfer is in progress.
SUCCEEDED (4): Data transfer completed successfully.
FAILED (5): Data transfer failed.
CANCELLED (6): Data transfer is cancelled.
"""
TRANSFER_STATE_UNSPECIFIED = 0
PENDING = 2
RUNNING = 3
SUCCEEDED = 4
FAILED = 5
CANCELLED = 6class TransferType(proto.Enum):
"""
DEPRECATED. Represents data transfer type.
Values:
TRANSFER_TYPE_UNSPECIFIED (0): Invalid or unknown transfer type placeholder.
BATCH (1): Batch data transfer.
STREAMING (2): Streaming data transfer.
"""
TRANSFER_TYPE_UNSPECIFIED = 0
BATCH = 1
STREAMING = 2from google.cloud import bigquery_datatransfer
from google.protobuf import timestamp_pb2
import datetime
# Create a time-based schedule
start_time = timestamp_pb2.Timestamp()
start_time.FromDatetime(datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc))
end_time = timestamp_pb2.Timestamp()
end_time.FromDatetime(datetime.datetime(2024, 12, 31, tzinfo=datetime.timezone.utc))
time_based_schedule = bigquery_datatransfer.TimeBasedSchedule(
schedule="every day 08:00",
start_time=start_time,
end_time=end_time
)
schedule_options_v2 = bigquery_datatransfer.ScheduleOptionsV2(
time_based_schedule=time_based_schedule
)
# Use in transfer config
transfer_config = {
"display_name": "Daily ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule_options_v2": schedule_options_v2,
# ... other config options
}from google.cloud import bigquery_datatransfer
# Create manual schedule (no automatic runs)
manual_schedule = bigquery_datatransfer.ManualSchedule()
schedule_options_v2 = bigquery_datatransfer.ScheduleOptionsV2(
manual_schedule=manual_schedule
)
# Use in transfer config for manual-only runs
transfer_config = {
"display_name": "Manual ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule_options_v2": schedule_options_v2,
# ... other config options
}from google.cloud import bigquery_datatransfer
# Create event-driven schedule
event_driven_schedule = bigquery_datatransfer.EventDrivenSchedule(
pubsub_subscription=f"projects/{project_id}/subscriptions/data-transfer-events"
)
schedule_options_v2 = bigquery_datatransfer.ScheduleOptionsV2(
event_driven_schedule=event_driven_schedule
)
# Use in transfer config for event-triggered runs
transfer_config = {
"display_name": "Event-Driven ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule_options_v2": schedule_options_v2,
# ... other config options
}from google.cloud import bigquery_datatransfer
from google.protobuf import timestamp_pb2
import datetime
# Create basic schedule options
start_time = timestamp_pb2.Timestamp()
start_time.FromDatetime(datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc))
end_time = timestamp_pb2.Timestamp()
end_time.FromDatetime(datetime.datetime(2024, 12, 31, tzinfo=datetime.timezone.utc))
schedule_options = bigquery_datatransfer.ScheduleOptions(
disable_auto_scheduling=False,
start_time=start_time,
end_time=end_time
)
# Use with regular schedule string
transfer_config = {
"display_name": "Scheduled ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule": "every day 08:00",
"schedule_options": schedule_options,
# ... other config options
}from google.cloud import bigquery_datatransfer
# Configure email notifications
email_preferences = bigquery_datatransfer.EmailPreferences(
enable_failure_email=True
)
# Use in transfer config
transfer_config = {
"display_name": "Monitored ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule": "every day 08:00",
"email_preferences": email_preferences,
# ... other config options
}from google.cloud import bigquery_datatransfer
# Configure encryption with customer-managed key
encryption_config = bigquery_datatransfer.EncryptionConfiguration(
kms_key_name=f"projects/{project_id}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{key}"
)
# Use in transfer config
transfer_config = {
"display_name": "Encrypted ETL Transfer",
"data_source_id": "scheduled_query",
"destination_dataset_id": "my_dataset",
"schedule": "every day 08:00",
"encryption_configuration": encryption_config,
# ... other config options
}from google.cloud import bigquery_datatransfer
from google.cloud.bigquery_datatransfer_v1 import TransferState
client = bigquery_datatransfer.DataTransferServiceClient()
# List only failed runs
parent = f"projects/{project_id}/locations/{location}/transferConfigs/{config_id}"
response = client.list_transfer_runs(
parent=parent,
states=[TransferState.FAILED]
)
print("Failed transfer runs:")
for run in response:
print(f" Run ID: {run.name}")
print(f" State: {run.state}")
print(f" Error: {run.error_status.message if run.error_status else 'No error details'}")
# Check run state
def get_state_description(state):
if state == TransferState.PENDING:
return "Transfer is scheduled and waiting"
elif state == TransferState.RUNNING:
return "Transfer is currently running"
elif state == TransferState.SUCCEEDED:
return "Transfer completed successfully"
elif state == TransferState.FAILED:
return "Transfer failed"
elif state == TransferState.CANCELLED:
return "Transfer was cancelled"
else:
return "Unknown state"
# Example usage
run = client.get_transfer_run(name=run_name)
print(f"Transfer state: {get_state_description(run.state)}")Common schedule formats for time-based scheduling:
# Daily schedules
"every day 08:00" # Every day at 8 AM
"every 2 days 14:30" # Every 2 days at 2:30 PM
# Weekly schedules
"every monday 09:00" # Every Monday at 9 AM
"every sunday 23:00" # Every Sunday at 11 PM
# Monthly schedules
"1st,15th of month 10:00" # 1st and 15th of each month at 10 AM
"last day of month 18:00" # Last day of each month at 6 PM
# Hourly schedules
"every 4 hours" # Every 4 hours
"every hour" # Every hour
# Custom cron expressions
"0 */6 * * *" # Every 6 hours (cron format)
"0 0 * * 1" # Every Monday at midnight (cron format)Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery-datatransfer