Provider package for Google services integration with Apache Airflow, including Google Ads, Google Cloud (GCP), Google Firebase, Google LevelDB, Google Marketing Platform, and Google Workspace
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive integration with Google Cloud Platform providing complete CRUD operations, batch processing, real-time streaming, and machine learning capabilities across 40+ Google Cloud services.
Data warehouse and analytics platform integration with support for datasets, tables, jobs, and complex queries.
class BigQueryHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
use_legacy_sql: bool = True,
location: Optional[str] = None,
priority: str = "INTERACTIVE",
job_id: Optional[str] = None,
job_retry: Optional[Retry] = None,
job_timeout: Optional[float] = None,
**kwargs
): ...
def get_service(self): ...
def create_empty_table(
self,
project_id: str,
dataset_id: str,
table_id: str,
schema_fields: List[Dict] = None,
time_partitioning: Dict = None,
cluster_fields: List[str] = None,
labels: Dict = None,
view: Dict = None,
materialized_view: Dict = None,
encryption_configuration: Dict = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
exists_ok: bool = False
): ...
def create_empty_dataset(
self,
dataset_id: str,
project_id: str,
location: Optional[str] = None,
dataset_reference: Optional[Dict] = None,
exists_ok: bool = False
): ...
def insert_job(
self,
configuration: Dict,
project_id: str,
location: Optional[str] = None,
job_id: Optional[str] = None,
timeout: Optional[float] = None,
retry: Optional[Retry] = None,
nowait: bool = False
): ...
class BigQueryAsyncHook(GoogleBaseAsyncHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
async def get_job_status(self, job_id: str, project_id: str, location: str): ...
class BigQueryCreateDatasetOperator(BaseOperator):
def __init__(
self,
dataset_id: str,
project_id: Optional[str] = None,
dataset_reference: Optional[Dict] = None,
location: Optional[str] = None,
gcp_conn_id: str = "google_cloud_default",
exists_ok: bool = False,
**kwargs
): ...
class BigQueryCreateEmptyTableOperator(BaseOperator):
def __init__(
self,
dataset_id: str,
table_id: str,
project_id: Optional[str] = None,
schema_fields: Optional[List] = None,
gcs_schema_object: Optional[str] = None,
time_partitioning: Optional[Dict] = None,
bigquery_conn_id: str = "google_cloud_default",
**kwargs
): ...
class BigQueryInsertJobOperator(BaseOperator):
def __init__(
self,
configuration: Dict,
project_id: Optional[str] = None,
location: Optional[str] = None,
job_id: Optional[str] = None,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
class BigQueryTableExistenceSensor(BaseSensorOperator):
def __init__(
self,
project_id: str,
dataset_id: str,
table_id: str,
bigquery_conn_id: str = "google_cloud_default",
**kwargs
): ...Object storage service integration for bucket and object management, with support for lifecycle policies and access controls.
class GCSHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def get_conn(self): ...
def list(
self,
bucket_name: str,
versions: Optional[bool] = None,
max_results: Optional[int] = None,
prefix: Optional[str] = None,
delimiter: Optional[str] = None
): ...
def exists(self, bucket_name: str, object_name: str): ...
def upload(
self,
bucket_name: str,
object_name: str,
filename: Optional[str] = None,
data: Optional[Union[str, bytes]] = None,
mime_type: Optional[str] = None,
gzip: bool = False,
encoding: str = "utf-8",
chunk_size: Optional[int] = None,
timeout: Optional[float] = None,
num_max_attempts: int = 1
): ...
def download(
self,
bucket_name: str,
object_name: str,
filename: Optional[str] = None,
chunk_size: int = 104857600,
timeout: Optional[float] = None,
num_max_attempts: int = 1
): ...
class GCSCreateBucketOperator(BaseOperator):
def __init__(
self,
bucket_name: str,
project_id: Optional[str] = None,
storage_class: str = "MULTI_REGIONAL",
location: str = "US",
labels: Optional[Dict] = None,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
class GCSDeleteBucketOperator(BaseOperator):
def __init__(
self,
bucket_name: str,
force: bool = True,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
class GCSObjectExistenceSensor(BaseSensorOperator):
def __init__(
self,
bucket: str,
object: str,
google_cloud_conn_id: str = "google_cloud_default",
**kwargs
): ...Managed Apache Spark and Hadoop service integration for big data processing.
class DataprocHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def create_cluster(
self,
project_id: str,
region: str,
cluster_name: str,
cluster_config: Dict,
labels: Optional[Dict] = None,
request_id: Optional[str] = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def delete_cluster(
self,
project_id: str,
region: str,
cluster_name: str,
cluster_uuid: Optional[str] = None,
request_id: Optional[str] = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def submit_job(
self,
project_id: str,
region: str,
job: Dict,
request_id: Optional[str] = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
class DataprocCreateClusterOperator(BaseOperator):
def __init__(
self,
cluster_name: str,
project_id: Optional[str] = None,
region: str = "global",
cluster_config: Optional[Dict] = None,
labels: Optional[Dict] = None,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
class DataprocSubmitJobOperator(BaseOperator):
def __init__(
self,
job: Dict,
project_id: Optional[str] = None,
region: str = "global",
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...Stream and batch data processing service integration using Apache Beam.
class DataflowHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def start_java_dataflow(
self,
job_name: str,
variables: Dict,
jar: str,
project_id: str,
job_class: Optional[str] = None,
append_job_name: bool = True,
multiple_jobs: bool = False,
on_new_job_id_callback: Optional[Callable[[str], None]] = None,
location: str = DEFAULT_DATAFLOW_LOCATION
): ...
def start_python_dataflow(
self,
job_name: str,
variables: Dict,
dataflow: str,
py_options: List[str],
project_id: str,
append_job_name: bool = True,
py_interpreter: str = "python3",
py_requirements: Optional[List[str]] = None,
py_system_site_packages: bool = False,
location: str = DEFAULT_DATAFLOW_LOCATION
): ...
class DataflowCreateJavaJobOperator(BaseOperator):
def __init__(
self,
jar: str,
job_name: str = "{{task.task_id}}",
dataflow_default_options: Optional[Dict] = None,
options: Optional[Dict] = None,
project_id: Optional[str] = None,
location: str = DEFAULT_DATAFLOW_LOCATION,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
class DataflowCreatePythonJobOperator(BaseOperator):
def __init__(
self,
py_file: str,
job_name: str = "{{task.task_id}}",
dataflow_default_options: Optional[Dict] = None,
options: Optional[Dict] = None,
py_interpreter: str = "python3",
py_options: Optional[List[str]] = None,
py_requirements: Optional[List[str]] = None,
py_system_site_packages: bool = False,
project_id: Optional[str] = None,
location: str = DEFAULT_DATAFLOW_LOCATION,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...Google Cloud's unified ML platform integration for training, deployment, and management of machine learning models.
class VertexAIHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def create_custom_training_job(
self,
project_id: str,
region: str,
display_name: str,
script_path: str,
container_uri: str,
requirements: Optional[Sequence[str]] = None,
model_serving_container_uri: Optional[str] = None,
model_serving_container_predict_route: Optional[str] = None,
model_serving_container_health_route: Optional[str] = None,
model_serving_container_command: Optional[Sequence[str]] = None,
model_serving_container_args: Optional[Sequence[str]] = None,
model_serving_container_environment_variables: Optional[Dict[str, str]] = None,
model_serving_container_ports: Optional[Sequence[Dict[str, str]]] = None,
model_description: Optional[str] = None,
model_instance_schema_uri: Optional[str] = None,
model_parameters_schema_uri: Optional[str] = None,
model_prediction_schema_uri: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
training_encryption_spec_key_name: Optional[str] = None,
model_encryption_spec_key_name: Optional[str] = None,
staging_bucket: Optional[str] = None,
**kwargs
): ...
class CreateCustomTrainingJobOperator(BaseOperator):
def __init__(
self,
project_id: str,
region: str,
display_name: str,
script_path: str,
container_uri: str,
model_serving_container_uri: Optional[str] = None,
requirements: Optional[Sequence[str]] = None,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...Messaging service integration for real-time event streaming and asynchronous communication.
class PubSubHook(GoogleBaseHook):
def __init__(
self,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def create_topic(
self,
project_id: str,
topic: str,
labels: Optional[Dict[str, str]] = None,
message_retention_duration: Optional[str] = None,
kms_key_name: Optional[str] = None,
schema_settings: Optional[Dict] = None,
message_storage_policy: Optional[Dict] = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def create_subscription(
self,
project_id: str,
topic: str,
subscription: str,
subscription_project_id: Optional[str] = None,
ack_deadline_secs: int = 10,
fail_if_exists: bool = False,
push_config: Optional[Dict] = None,
retain_acked_messages: Optional[bool] = None,
message_retention_duration: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
enable_message_ordering: bool = False,
expiration_policy: Optional[Dict] = None,
filter_: Optional[str] = None,
dead_letter_policy: Optional[Dict] = None,
retry_policy: Optional[Dict] = None,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def publish(
self,
project_id: str,
topic: str,
messages: List[Dict],
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
class PubSubPullSensor(BaseSensorOperator):
def __init__(
self,
project_id: str,
subscription: str,
max_messages: int = 5,
ack_messages: bool = False,
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...Managed relational database service integration supporting MySQL, PostgreSQL, and SQL Server.
class CloudSQLHook(GoogleBaseHook):
def __init__(
self,
api_version: str = "v1beta4",
gcp_conn_id: str = "google_cloud_default",
**kwargs
): ...
def create_instance(
self,
project_id: str,
body: Dict,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def patch_instance(
self,
project_id: str,
body: Dict,
instance: str,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def delete_instance(
self,
project_id: str,
instance: str,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...
def create_database(
self,
project_id: str,
instance: str,
body: Dict,
retry: Optional[Retry] = None,
timeout: Optional[float] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None
): ...from typing import Dict, List, Optional, Union, Any, Sequence, Callable
from google.api_core.retry import Retry
from airflow.models import BaseOperator
from airflow.sensors.base import BaseSensorOperator
# BigQuery specific types
BigQueryJob = Dict[str, Any]
BigQueryTable = Dict[str, Any]
BigQuerySchema = List[Dict[str, str]]
# GCS specific types
GCSObject = Dict[str, Any]
GCSBucket = Dict[str, Any]
# Dataproc specific types
DataprocCluster = Dict[str, Any]
DataprocJob = Dict[str, Any]
# Common GCP types
GcpResource = Dict[str, Any]
ResourceLabels = Dict[str, str]
OperationResult = Dict[str, Any]Install with Tessl CLI
npx tessl i tessl/pypi-apache-airflow-providers-google