or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

common-utilities.mddata-transfers.mdfirebase.mdgcp-services.mdgoogle-ads.mdgoogle-workspace.mdindex.mdleveldb.mdmarketing-platform.md

gcp-services.mddocs/

0

# Google Cloud Platform Services

1

2

Comprehensive integration with Google Cloud Platform providing complete CRUD operations, batch processing, real-time streaming, and machine learning capabilities across 40+ Google Cloud services.

3

4

## Capabilities

5

6

### BigQuery

7

8

Data warehouse and analytics platform integration with support for datasets, tables, jobs, and complex queries.

9

10

```python { .api }

11

class BigQueryHook(GoogleBaseHook):

12

def __init__(

13

self,

14

gcp_conn_id: str = "google_cloud_default",

15

use_legacy_sql: bool = True,

16

location: Optional[str] = None,

17

priority: str = "INTERACTIVE",

18

job_id: Optional[str] = None,

19

job_retry: Optional[Retry] = None,

20

job_timeout: Optional[float] = None,

21

**kwargs

22

): ...

23

24

def get_service(self): ...

25

def create_empty_table(

26

self,

27

project_id: str,

28

dataset_id: str,

29

table_id: str,

30

schema_fields: List[Dict] = None,

31

time_partitioning: Dict = None,

32

cluster_fields: List[str] = None,

33

labels: Dict = None,

34

view: Dict = None,

35

materialized_view: Dict = None,

36

encryption_configuration: Dict = None,

37

retry: Optional[Retry] = None,

38

timeout: Optional[float] = None,

39

exists_ok: bool = False

40

): ...

41

def create_empty_dataset(

42

self,

43

dataset_id: str,

44

project_id: str,

45

location: Optional[str] = None,

46

dataset_reference: Optional[Dict] = None,

47

exists_ok: bool = False

48

): ...

49

def insert_job(

50

self,

51

configuration: Dict,

52

project_id: str,

53

location: Optional[str] = None,

54

job_id: Optional[str] = None,

55

timeout: Optional[float] = None,

56

retry: Optional[Retry] = None,

57

nowait: bool = False

58

): ...

59

60

class BigQueryAsyncHook(GoogleBaseAsyncHook):

61

def __init__(

62

self,

63

gcp_conn_id: str = "google_cloud_default",

64

**kwargs

65

): ...

66

async def get_job_status(self, job_id: str, project_id: str, location: str): ...

67

68

class BigQueryCreateDatasetOperator(BaseOperator):

69

def __init__(

70

self,

71

dataset_id: str,

72

project_id: Optional[str] = None,

73

dataset_reference: Optional[Dict] = None,

74

location: Optional[str] = None,

75

gcp_conn_id: str = "google_cloud_default",

76

exists_ok: bool = False,

77

**kwargs

78

): ...

79

80

class BigQueryCreateEmptyTableOperator(BaseOperator):

81

def __init__(

82

self,

83

dataset_id: str,

84

table_id: str,

85

project_id: Optional[str] = None,

86

schema_fields: Optional[List] = None,

87

gcs_schema_object: Optional[str] = None,

88

time_partitioning: Optional[Dict] = None,

89

bigquery_conn_id: str = "google_cloud_default",

90

**kwargs

91

): ...

92

93

class BigQueryInsertJobOperator(BaseOperator):

94

def __init__(

95

self,

96

configuration: Dict,

97

project_id: Optional[str] = None,

98

location: Optional[str] = None,

99

job_id: Optional[str] = None,

100

gcp_conn_id: str = "google_cloud_default",

101

**kwargs

102

): ...

103

104

class BigQueryTableExistenceSensor(BaseSensorOperator):

105

def __init__(

106

self,

107

project_id: str,

108

dataset_id: str,

109

table_id: str,

110

bigquery_conn_id: str = "google_cloud_default",

111

**kwargs

112

): ...

113

```

114

115

### Google Cloud Storage (GCS)

116

117

Object storage service integration for bucket and object management, with support for lifecycle policies and access controls.

118

119

```python { .api }

120

class GCSHook(GoogleBaseHook):

121

def __init__(

122

self,

123

gcp_conn_id: str = "google_cloud_default",

124

**kwargs

125

): ...

126

127

def get_conn(self): ...

128

def list(

129

self,

130

bucket_name: str,

131

versions: Optional[bool] = None,

132

max_results: Optional[int] = None,

133

prefix: Optional[str] = None,

134

delimiter: Optional[str] = None

135

): ...

136

def exists(self, bucket_name: str, object_name: str): ...

137

def upload(

138

self,

139

bucket_name: str,

140

object_name: str,

141

filename: Optional[str] = None,

142

data: Optional[Union[str, bytes]] = None,

143

mime_type: Optional[str] = None,

144

gzip: bool = False,

145

encoding: str = "utf-8",

146

chunk_size: Optional[int] = None,

147

timeout: Optional[float] = None,

148

num_max_attempts: int = 1

149

): ...

150

def download(

151

self,

152

bucket_name: str,

153

object_name: str,

154

filename: Optional[str] = None,

155

chunk_size: int = 104857600,

156

timeout: Optional[float] = None,

157

num_max_attempts: int = 1

158

): ...

159

160

class GCSCreateBucketOperator(BaseOperator):

161

def __init__(

162

self,

163

bucket_name: str,

164

project_id: Optional[str] = None,

165

storage_class: str = "MULTI_REGIONAL",

166

location: str = "US",

167

labels: Optional[Dict] = None,

168

gcp_conn_id: str = "google_cloud_default",

169

**kwargs

170

): ...

171

172

class GCSDeleteBucketOperator(BaseOperator):

173

def __init__(

174

self,

175

bucket_name: str,

176

force: bool = True,

177

gcp_conn_id: str = "google_cloud_default",

178

**kwargs

179

): ...

180

181

class GCSObjectExistenceSensor(BaseSensorOperator):

182

def __init__(

183

self,

184

bucket: str,

185

object: str,

186

google_cloud_conn_id: str = "google_cloud_default",

187

**kwargs

188

): ...

189

```

190

191

### Dataproc

192

193

Managed Apache Spark and Hadoop service integration for big data processing.

194

195

```python { .api }

196

class DataprocHook(GoogleBaseHook):

197

def __init__(

198

self,

199

gcp_conn_id: str = "google_cloud_default",

200

**kwargs

201

): ...

202

203

def create_cluster(

204

self,

205

project_id: str,

206

region: str,

207

cluster_name: str,

208

cluster_config: Dict,

209

labels: Optional[Dict] = None,

210

request_id: Optional[str] = None,

211

retry: Optional[Retry] = None,

212

timeout: Optional[float] = None,

213

metadata: Optional[Sequence[Tuple[str, str]]] = None

214

): ...

215

def delete_cluster(

216

self,

217

project_id: str,

218

region: str,

219

cluster_name: str,

220

cluster_uuid: Optional[str] = None,

221

request_id: Optional[str] = None,

222

retry: Optional[Retry] = None,

223

timeout: Optional[float] = None,

224

metadata: Optional[Sequence[Tuple[str, str]]] = None

225

): ...

226

def submit_job(

227

self,

228

project_id: str,

229

region: str,

230

job: Dict,

231

request_id: Optional[str] = None,

232

retry: Optional[Retry] = None,

233

timeout: Optional[float] = None,

234

metadata: Optional[Sequence[Tuple[str, str]]] = None

235

): ...

236

237

class DataprocCreateClusterOperator(BaseOperator):

238

def __init__(

239

self,

240

cluster_name: str,

241

project_id: Optional[str] = None,

242

region: str = "global",

243

cluster_config: Optional[Dict] = None,

244

labels: Optional[Dict] = None,

245

gcp_conn_id: str = "google_cloud_default",

246

**kwargs

247

): ...

248

249

class DataprocSubmitJobOperator(BaseOperator):

250

def __init__(

251

self,

252

job: Dict,

253

project_id: Optional[str] = None,

254

region: str = "global",

255

gcp_conn_id: str = "google_cloud_default",

256

**kwargs

257

): ...

258

```

259

260

### Dataflow

261

262

Stream and batch data processing service integration using Apache Beam.

263

264

```python { .api }

265

class DataflowHook(GoogleBaseHook):

266

def __init__(

267

self,

268

gcp_conn_id: str = "google_cloud_default",

269

**kwargs

270

): ...

271

272

def start_java_dataflow(

273

self,

274

job_name: str,

275

variables: Dict,

276

jar: str,

277

project_id: str,

278

job_class: Optional[str] = None,

279

append_job_name: bool = True,

280

multiple_jobs: bool = False,

281

on_new_job_id_callback: Optional[Callable[[str], None]] = None,

282

location: str = DEFAULT_DATAFLOW_LOCATION

283

): ...

284

def start_python_dataflow(

285

self,

286

job_name: str,

287

variables: Dict,

288

dataflow: str,

289

py_options: List[str],

290

project_id: str,

291

append_job_name: bool = True,

292

py_interpreter: str = "python3",

293

py_requirements: Optional[List[str]] = None,

294

py_system_site_packages: bool = False,

295

location: str = DEFAULT_DATAFLOW_LOCATION

296

): ...

297

298

class DataflowCreateJavaJobOperator(BaseOperator):

299

def __init__(

300

self,

301

jar: str,

302

job_name: str = "{{task.task_id}}",

303

dataflow_default_options: Optional[Dict] = None,

304

options: Optional[Dict] = None,

305

project_id: Optional[str] = None,

306

location: str = DEFAULT_DATAFLOW_LOCATION,

307

gcp_conn_id: str = "google_cloud_default",

308

**kwargs

309

): ...

310

311

class DataflowCreatePythonJobOperator(BaseOperator):

312

def __init__(

313

self,

314

py_file: str,

315

job_name: str = "{{task.task_id}}",

316

dataflow_default_options: Optional[Dict] = None,

317

options: Optional[Dict] = None,

318

py_interpreter: str = "python3",

319

py_options: Optional[List[str]] = None,

320

py_requirements: Optional[List[str]] = None,

321

py_system_site_packages: bool = False,

322

project_id: Optional[str] = None,

323

location: str = DEFAULT_DATAFLOW_LOCATION,

324

gcp_conn_id: str = "google_cloud_default",

325

**kwargs

326

): ...

327

```

328

329

### Vertex AI

330

331

Google Cloud's unified ML platform integration for training, deployment, and management of machine learning models.

332

333

```python { .api }

334

class VertexAIHook(GoogleBaseHook):

335

def __init__(

336

self,

337

gcp_conn_id: str = "google_cloud_default",

338

**kwargs

339

): ...

340

341

def create_custom_training_job(

342

self,

343

project_id: str,

344

region: str,

345

display_name: str,

346

script_path: str,

347

container_uri: str,

348

requirements: Optional[Sequence[str]] = None,

349

model_serving_container_uri: Optional[str] = None,

350

model_serving_container_predict_route: Optional[str] = None,

351

model_serving_container_health_route: Optional[str] = None,

352

model_serving_container_command: Optional[Sequence[str]] = None,

353

model_serving_container_args: Optional[Sequence[str]] = None,

354

model_serving_container_environment_variables: Optional[Dict[str, str]] = None,

355

model_serving_container_ports: Optional[Sequence[Dict[str, str]]] = None,

356

model_description: Optional[str] = None,

357

model_instance_schema_uri: Optional[str] = None,

358

model_parameters_schema_uri: Optional[str] = None,

359

model_prediction_schema_uri: Optional[str] = None,

360

labels: Optional[Dict[str, str]] = None,

361

training_encryption_spec_key_name: Optional[str] = None,

362

model_encryption_spec_key_name: Optional[str] = None,

363

staging_bucket: Optional[str] = None,

364

**kwargs

365

): ...

366

367

class CreateCustomTrainingJobOperator(BaseOperator):

368

def __init__(

369

self,

370

project_id: str,

371

region: str,

372

display_name: str,

373

script_path: str,

374

container_uri: str,

375

model_serving_container_uri: Optional[str] = None,

376

requirements: Optional[Sequence[str]] = None,

377

gcp_conn_id: str = "google_cloud_default",

378

**kwargs

379

): ...

380

```

381

382

### Pub/Sub

383

384

Messaging service integration for real-time event streaming and asynchronous communication.

385

386

```python { .api }

387

class PubSubHook(GoogleBaseHook):

388

def __init__(

389

self,

390

gcp_conn_id: str = "google_cloud_default",

391

**kwargs

392

): ...

393

394

def create_topic(

395

self,

396

project_id: str,

397

topic: str,

398

labels: Optional[Dict[str, str]] = None,

399

message_retention_duration: Optional[str] = None,

400

kms_key_name: Optional[str] = None,

401

schema_settings: Optional[Dict] = None,

402

message_storage_policy: Optional[Dict] = None,

403

retry: Optional[Retry] = None,

404

timeout: Optional[float] = None,

405

metadata: Optional[Sequence[Tuple[str, str]]] = None

406

): ...

407

def create_subscription(

408

self,

409

project_id: str,

410

topic: str,

411

subscription: str,

412

subscription_project_id: Optional[str] = None,

413

ack_deadline_secs: int = 10,

414

fail_if_exists: bool = False,

415

push_config: Optional[Dict] = None,

416

retain_acked_messages: Optional[bool] = None,

417

message_retention_duration: Optional[str] = None,

418

labels: Optional[Dict[str, str]] = None,

419

enable_message_ordering: bool = False,

420

expiration_policy: Optional[Dict] = None,

421

filter_: Optional[str] = None,

422

dead_letter_policy: Optional[Dict] = None,

423

retry_policy: Optional[Dict] = None,

424

retry: Optional[Retry] = None,

425

timeout: Optional[float] = None,

426

metadata: Optional[Sequence[Tuple[str, str]]] = None

427

): ...

428

def publish(

429

self,

430

project_id: str,

431

topic: str,

432

messages: List[Dict],

433

retry: Optional[Retry] = None,

434

timeout: Optional[float] = None,

435

metadata: Optional[Sequence[Tuple[str, str]]] = None

436

): ...

437

438

class PubSubPullSensor(BaseSensorOperator):

439

def __init__(

440

self,

441

project_id: str,

442

subscription: str,

443

max_messages: int = 5,

444

ack_messages: bool = False,

445

gcp_conn_id: str = "google_cloud_default",

446

**kwargs

447

): ...

448

```

449

450

### Cloud SQL

451

452

Managed relational database service integration supporting MySQL, PostgreSQL, and SQL Server.

453

454

```python { .api }

455

class CloudSQLHook(GoogleBaseHook):

456

def __init__(

457

self,

458

api_version: str = "v1beta4",

459

gcp_conn_id: str = "google_cloud_default",

460

**kwargs

461

): ...

462

463

def create_instance(

464

self,

465

project_id: str,

466

body: Dict,

467

retry: Optional[Retry] = None,

468

timeout: Optional[float] = None,

469

metadata: Optional[Sequence[Tuple[str, str]]] = None

470

): ...

471

def patch_instance(

472

self,

473

project_id: str,

474

body: Dict,

475

instance: str,

476

retry: Optional[Retry] = None,

477

timeout: Optional[float] = None,

478

metadata: Optional[Sequence[Tuple[str, str]]] = None

479

): ...

480

def delete_instance(

481

self,

482

project_id: str,

483

instance: str,

484

retry: Optional[Retry] = None,

485

timeout: Optional[float] = None,

486

metadata: Optional[Sequence[Tuple[str, str]]] = None

487

): ...

488

def create_database(

489

self,

490

project_id: str,

491

instance: str,

492

body: Dict,

493

retry: Optional[Retry] = None,

494

timeout: Optional[float] = None,

495

metadata: Optional[Sequence[Tuple[str, str]]] = None

496

): ...

497

```

498

499

## Types

500

501

```python { .api }

502

from typing import Dict, List, Optional, Union, Any, Sequence, Callable

503

from google.api_core.retry import Retry

504

from airflow.models import BaseOperator

505

from airflow.sensors.base import BaseSensorOperator

506

507

# BigQuery specific types

508

BigQueryJob = Dict[str, Any]

509

BigQueryTable = Dict[str, Any]

510

BigQuerySchema = List[Dict[str, str]]

511

512

# GCS specific types

513

GCSObject = Dict[str, Any]

514

GCSBucket = Dict[str, Any]

515

516

# Dataproc specific types

517

DataprocCluster = Dict[str, Any]

518

DataprocJob = Dict[str, Any]

519

520

# Common GCP types

521

GcpResource = Dict[str, Any]

522

ResourceLabels = Dict[str, str]

523

OperationResult = Dict[str, Any]

524

```