0
# Google Cloud Platform Services
1
2
Comprehensive integration with Google Cloud Platform providing complete CRUD operations, batch processing, real-time streaming, and machine learning capabilities across 40+ Google Cloud services.
3
4
## Capabilities
5
6
### BigQuery
7
8
Data warehouse and analytics platform integration with support for datasets, tables, jobs, and complex queries.
9
10
```python { .api }
11
class BigQueryHook(GoogleBaseHook):
12
def __init__(
13
self,
14
gcp_conn_id: str = "google_cloud_default",
15
use_legacy_sql: bool = True,
16
location: Optional[str] = None,
17
priority: str = "INTERACTIVE",
18
job_id: Optional[str] = None,
19
job_retry: Optional[Retry] = None,
20
job_timeout: Optional[float] = None,
21
**kwargs
22
): ...
23
24
def get_service(self): ...
25
def create_empty_table(
26
self,
27
project_id: str,
28
dataset_id: str,
29
table_id: str,
30
schema_fields: List[Dict] = None,
31
time_partitioning: Dict = None,
32
cluster_fields: List[str] = None,
33
labels: Dict = None,
34
view: Dict = None,
35
materialized_view: Dict = None,
36
encryption_configuration: Dict = None,
37
retry: Optional[Retry] = None,
38
timeout: Optional[float] = None,
39
exists_ok: bool = False
40
): ...
41
def create_empty_dataset(
42
self,
43
dataset_id: str,
44
project_id: str,
45
location: Optional[str] = None,
46
dataset_reference: Optional[Dict] = None,
47
exists_ok: bool = False
48
): ...
49
def insert_job(
50
self,
51
configuration: Dict,
52
project_id: str,
53
location: Optional[str] = None,
54
job_id: Optional[str] = None,
55
timeout: Optional[float] = None,
56
retry: Optional[Retry] = None,
57
nowait: bool = False
58
): ...
59
60
class BigQueryAsyncHook(GoogleBaseAsyncHook):
61
def __init__(
62
self,
63
gcp_conn_id: str = "google_cloud_default",
64
**kwargs
65
): ...
66
async def get_job_status(self, job_id: str, project_id: str, location: str): ...
67
68
class BigQueryCreateDatasetOperator(BaseOperator):
69
def __init__(
70
self,
71
dataset_id: str,
72
project_id: Optional[str] = None,
73
dataset_reference: Optional[Dict] = None,
74
location: Optional[str] = None,
75
gcp_conn_id: str = "google_cloud_default",
76
exists_ok: bool = False,
77
**kwargs
78
): ...
79
80
class BigQueryCreateEmptyTableOperator(BaseOperator):
81
def __init__(
82
self,
83
dataset_id: str,
84
table_id: str,
85
project_id: Optional[str] = None,
86
schema_fields: Optional[List] = None,
87
gcs_schema_object: Optional[str] = None,
88
time_partitioning: Optional[Dict] = None,
89
bigquery_conn_id: str = "google_cloud_default",
90
**kwargs
91
): ...
92
93
class BigQueryInsertJobOperator(BaseOperator):
94
def __init__(
95
self,
96
configuration: Dict,
97
project_id: Optional[str] = None,
98
location: Optional[str] = None,
99
job_id: Optional[str] = None,
100
gcp_conn_id: str = "google_cloud_default",
101
**kwargs
102
): ...
103
104
class BigQueryTableExistenceSensor(BaseSensorOperator):
105
def __init__(
106
self,
107
project_id: str,
108
dataset_id: str,
109
table_id: str,
110
bigquery_conn_id: str = "google_cloud_default",
111
**kwargs
112
): ...
113
```
114
115
### Google Cloud Storage (GCS)
116
117
Object storage service integration for bucket and object management, with support for lifecycle policies and access controls.
118
119
```python { .api }
120
class GCSHook(GoogleBaseHook):
121
def __init__(
122
self,
123
gcp_conn_id: str = "google_cloud_default",
124
**kwargs
125
): ...
126
127
def get_conn(self): ...
128
def list(
129
self,
130
bucket_name: str,
131
versions: Optional[bool] = None,
132
max_results: Optional[int] = None,
133
prefix: Optional[str] = None,
134
delimiter: Optional[str] = None
135
): ...
136
def exists(self, bucket_name: str, object_name: str): ...
137
def upload(
138
self,
139
bucket_name: str,
140
object_name: str,
141
filename: Optional[str] = None,
142
data: Optional[Union[str, bytes]] = None,
143
mime_type: Optional[str] = None,
144
gzip: bool = False,
145
encoding: str = "utf-8",
146
chunk_size: Optional[int] = None,
147
timeout: Optional[float] = None,
148
num_max_attempts: int = 1
149
): ...
150
def download(
151
self,
152
bucket_name: str,
153
object_name: str,
154
filename: Optional[str] = None,
155
chunk_size: int = 104857600,
156
timeout: Optional[float] = None,
157
num_max_attempts: int = 1
158
): ...
159
160
class GCSCreateBucketOperator(BaseOperator):
161
def __init__(
162
self,
163
bucket_name: str,
164
project_id: Optional[str] = None,
165
storage_class: str = "MULTI_REGIONAL",
166
location: str = "US",
167
labels: Optional[Dict] = None,
168
gcp_conn_id: str = "google_cloud_default",
169
**kwargs
170
): ...
171
172
class GCSDeleteBucketOperator(BaseOperator):
173
def __init__(
174
self,
175
bucket_name: str,
176
force: bool = True,
177
gcp_conn_id: str = "google_cloud_default",
178
**kwargs
179
): ...
180
181
class GCSObjectExistenceSensor(BaseSensorOperator):
182
def __init__(
183
self,
184
bucket: str,
185
object: str,
186
google_cloud_conn_id: str = "google_cloud_default",
187
**kwargs
188
): ...
189
```
190
191
### Dataproc
192
193
Managed Apache Spark and Hadoop service integration for big data processing.
194
195
```python { .api }
196
class DataprocHook(GoogleBaseHook):
197
def __init__(
198
self,
199
gcp_conn_id: str = "google_cloud_default",
200
**kwargs
201
): ...
202
203
def create_cluster(
204
self,
205
project_id: str,
206
region: str,
207
cluster_name: str,
208
cluster_config: Dict,
209
labels: Optional[Dict] = None,
210
request_id: Optional[str] = None,
211
retry: Optional[Retry] = None,
212
timeout: Optional[float] = None,
213
metadata: Optional[Sequence[Tuple[str, str]]] = None
214
): ...
215
def delete_cluster(
216
self,
217
project_id: str,
218
region: str,
219
cluster_name: str,
220
cluster_uuid: Optional[str] = None,
221
request_id: Optional[str] = None,
222
retry: Optional[Retry] = None,
223
timeout: Optional[float] = None,
224
metadata: Optional[Sequence[Tuple[str, str]]] = None
225
): ...
226
def submit_job(
227
self,
228
project_id: str,
229
region: str,
230
job: Dict,
231
request_id: Optional[str] = None,
232
retry: Optional[Retry] = None,
233
timeout: Optional[float] = None,
234
metadata: Optional[Sequence[Tuple[str, str]]] = None
235
): ...
236
237
class DataprocCreateClusterOperator(BaseOperator):
238
def __init__(
239
self,
240
cluster_name: str,
241
project_id: Optional[str] = None,
242
region: str = "global",
243
cluster_config: Optional[Dict] = None,
244
labels: Optional[Dict] = None,
245
gcp_conn_id: str = "google_cloud_default",
246
**kwargs
247
): ...
248
249
class DataprocSubmitJobOperator(BaseOperator):
250
def __init__(
251
self,
252
job: Dict,
253
project_id: Optional[str] = None,
254
region: str = "global",
255
gcp_conn_id: str = "google_cloud_default",
256
**kwargs
257
): ...
258
```
259
260
### Dataflow
261
262
Stream and batch data processing service integration using Apache Beam.
263
264
```python { .api }
265
class DataflowHook(GoogleBaseHook):
266
def __init__(
267
self,
268
gcp_conn_id: str = "google_cloud_default",
269
**kwargs
270
): ...
271
272
def start_java_dataflow(
273
self,
274
job_name: str,
275
variables: Dict,
276
jar: str,
277
project_id: str,
278
job_class: Optional[str] = None,
279
append_job_name: bool = True,
280
multiple_jobs: bool = False,
281
on_new_job_id_callback: Optional[Callable[[str], None]] = None,
282
location: str = DEFAULT_DATAFLOW_LOCATION
283
): ...
284
def start_python_dataflow(
285
self,
286
job_name: str,
287
variables: Dict,
288
dataflow: str,
289
py_options: List[str],
290
project_id: str,
291
append_job_name: bool = True,
292
py_interpreter: str = "python3",
293
py_requirements: Optional[List[str]] = None,
294
py_system_site_packages: bool = False,
295
location: str = DEFAULT_DATAFLOW_LOCATION
296
): ...
297
298
class DataflowCreateJavaJobOperator(BaseOperator):
299
def __init__(
300
self,
301
jar: str,
302
job_name: str = "{{task.task_id}}",
303
dataflow_default_options: Optional[Dict] = None,
304
options: Optional[Dict] = None,
305
project_id: Optional[str] = None,
306
location: str = DEFAULT_DATAFLOW_LOCATION,
307
gcp_conn_id: str = "google_cloud_default",
308
**kwargs
309
): ...
310
311
class DataflowCreatePythonJobOperator(BaseOperator):
312
def __init__(
313
self,
314
py_file: str,
315
job_name: str = "{{task.task_id}}",
316
dataflow_default_options: Optional[Dict] = None,
317
options: Optional[Dict] = None,
318
py_interpreter: str = "python3",
319
py_options: Optional[List[str]] = None,
320
py_requirements: Optional[List[str]] = None,
321
py_system_site_packages: bool = False,
322
project_id: Optional[str] = None,
323
location: str = DEFAULT_DATAFLOW_LOCATION,
324
gcp_conn_id: str = "google_cloud_default",
325
**kwargs
326
): ...
327
```
328
329
### Vertex AI
330
331
Google Cloud's unified ML platform integration for training, deployment, and management of machine learning models.
332
333
```python { .api }
334
class VertexAIHook(GoogleBaseHook):
335
def __init__(
336
self,
337
gcp_conn_id: str = "google_cloud_default",
338
**kwargs
339
): ...
340
341
def create_custom_training_job(
342
self,
343
project_id: str,
344
region: str,
345
display_name: str,
346
script_path: str,
347
container_uri: str,
348
requirements: Optional[Sequence[str]] = None,
349
model_serving_container_uri: Optional[str] = None,
350
model_serving_container_predict_route: Optional[str] = None,
351
model_serving_container_health_route: Optional[str] = None,
352
model_serving_container_command: Optional[Sequence[str]] = None,
353
model_serving_container_args: Optional[Sequence[str]] = None,
354
model_serving_container_environment_variables: Optional[Dict[str, str]] = None,
355
model_serving_container_ports: Optional[Sequence[Dict[str, str]]] = None,
356
model_description: Optional[str] = None,
357
model_instance_schema_uri: Optional[str] = None,
358
model_parameters_schema_uri: Optional[str] = None,
359
model_prediction_schema_uri: Optional[str] = None,
360
labels: Optional[Dict[str, str]] = None,
361
training_encryption_spec_key_name: Optional[str] = None,
362
model_encryption_spec_key_name: Optional[str] = None,
363
staging_bucket: Optional[str] = None,
364
**kwargs
365
): ...
366
367
class CreateCustomTrainingJobOperator(BaseOperator):
368
def __init__(
369
self,
370
project_id: str,
371
region: str,
372
display_name: str,
373
script_path: str,
374
container_uri: str,
375
model_serving_container_uri: Optional[str] = None,
376
requirements: Optional[Sequence[str]] = None,
377
gcp_conn_id: str = "google_cloud_default",
378
**kwargs
379
): ...
380
```
381
382
### Pub/Sub
383
384
Messaging service integration for real-time event streaming and asynchronous communication.
385
386
```python { .api }
387
class PubSubHook(GoogleBaseHook):
388
def __init__(
389
self,
390
gcp_conn_id: str = "google_cloud_default",
391
**kwargs
392
): ...
393
394
def create_topic(
395
self,
396
project_id: str,
397
topic: str,
398
labels: Optional[Dict[str, str]] = None,
399
message_retention_duration: Optional[str] = None,
400
kms_key_name: Optional[str] = None,
401
schema_settings: Optional[Dict] = None,
402
message_storage_policy: Optional[Dict] = None,
403
retry: Optional[Retry] = None,
404
timeout: Optional[float] = None,
405
metadata: Optional[Sequence[Tuple[str, str]]] = None
406
): ...
407
def create_subscription(
408
self,
409
project_id: str,
410
topic: str,
411
subscription: str,
412
subscription_project_id: Optional[str] = None,
413
ack_deadline_secs: int = 10,
414
fail_if_exists: bool = False,
415
push_config: Optional[Dict] = None,
416
retain_acked_messages: Optional[bool] = None,
417
message_retention_duration: Optional[str] = None,
418
labels: Optional[Dict[str, str]] = None,
419
enable_message_ordering: bool = False,
420
expiration_policy: Optional[Dict] = None,
421
filter_: Optional[str] = None,
422
dead_letter_policy: Optional[Dict] = None,
423
retry_policy: Optional[Dict] = None,
424
retry: Optional[Retry] = None,
425
timeout: Optional[float] = None,
426
metadata: Optional[Sequence[Tuple[str, str]]] = None
427
): ...
428
def publish(
429
self,
430
project_id: str,
431
topic: str,
432
messages: List[Dict],
433
retry: Optional[Retry] = None,
434
timeout: Optional[float] = None,
435
metadata: Optional[Sequence[Tuple[str, str]]] = None
436
): ...
437
438
class PubSubPullSensor(BaseSensorOperator):
439
def __init__(
440
self,
441
project_id: str,
442
subscription: str,
443
max_messages: int = 5,
444
ack_messages: bool = False,
445
gcp_conn_id: str = "google_cloud_default",
446
**kwargs
447
): ...
448
```
449
450
### Cloud SQL
451
452
Managed relational database service integration supporting MySQL, PostgreSQL, and SQL Server.
453
454
```python { .api }
455
class CloudSQLHook(GoogleBaseHook):
456
def __init__(
457
self,
458
api_version: str = "v1beta4",
459
gcp_conn_id: str = "google_cloud_default",
460
**kwargs
461
): ...
462
463
def create_instance(
464
self,
465
project_id: str,
466
body: Dict,
467
retry: Optional[Retry] = None,
468
timeout: Optional[float] = None,
469
metadata: Optional[Sequence[Tuple[str, str]]] = None
470
): ...
471
def patch_instance(
472
self,
473
project_id: str,
474
body: Dict,
475
instance: str,
476
retry: Optional[Retry] = None,
477
timeout: Optional[float] = None,
478
metadata: Optional[Sequence[Tuple[str, str]]] = None
479
): ...
480
def delete_instance(
481
self,
482
project_id: str,
483
instance: str,
484
retry: Optional[Retry] = None,
485
timeout: Optional[float] = None,
486
metadata: Optional[Sequence[Tuple[str, str]]] = None
487
): ...
488
def create_database(
489
self,
490
project_id: str,
491
instance: str,
492
body: Dict,
493
retry: Optional[Retry] = None,
494
timeout: Optional[float] = None,
495
metadata: Optional[Sequence[Tuple[str, str]]] = None
496
): ...
497
```
498
499
## Types
500
501
```python { .api }
502
from typing import Dict, List, Optional, Union, Any, Sequence, Callable
503
from google.api_core.retry import Retry
504
from airflow.models import BaseOperator
505
from airflow.sensors.base import BaseSensorOperator
506
507
# BigQuery specific types
508
BigQueryJob = Dict[str, Any]
509
BigQueryTable = Dict[str, Any]
510
BigQuerySchema = List[Dict[str, str]]
511
512
# GCS specific types
513
GCSObject = Dict[str, Any]
514
GCSBucket = Dict[str, Any]
515
516
# Dataproc specific types
517
DataprocCluster = Dict[str, Any]
518
DataprocJob = Dict[str, Any]
519
520
# Common GCP types
521
GcpResource = Dict[str, Any]
522
ResourceLabels = Dict[str, str]
523
OperationResult = Dict[str, Any]
524
```