or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-operations.mdbeta-features.mddocument-processing.mddocument-types.mdindex.mdprocessor-management.md

processor-management.mddocs/

0

# Processor Management

1

2

This guide covers comprehensive processor lifecycle management including creation, configuration, deployment, training, and monitoring of document processors.

3

4

## Processor Lifecycle Overview

5

6

Document processors follow this lifecycle:

7

1. **Create** - Initialize a new processor instance

8

2. **Configure** - Set up processor parameters and options

9

3. **Train** - Train custom processors (for custom types)

10

4. **Deploy** - Make processor versions available for processing

11

5. **Monitor** - Track performance and usage

12

6. **Update** - Deploy new versions and manage defaults

13

7. **Cleanup** - Disable and delete unused processors

14

15

## List and Discover Processors

16

17

### List Available Processors

18

19

```python { .api }

20

from google.cloud.documentai import DocumentProcessorServiceClient

21

from google.cloud.documentai.types import ListProcessorsRequest

22

23

def list_processors(project_id: str, location: str) -> list["Processor"]:

24

"""

25

List all processors in a project location.

26

27

Args:

28

project_id: Google Cloud project ID

29

location: Processor location (e.g., 'us', 'eu')

30

31

Returns:

32

list[Processor]: List of processor instances

33

"""

34

client = DocumentProcessorServiceClient()

35

36

# Build parent path

37

parent = client.common_location_path(project_id, location)

38

39

# Create request

40

request = ListProcessorsRequest(parent=parent)

41

42

# List processors

43

response = client.list_processors(request=request)

44

45

processors = []

46

for processor in response.processors:

47

processors.append(processor)

48

49

return processors

50

51

def display_processor_info(processors: list["Processor"]) -> None:

52

"""

53

Display processor information in a readable format.

54

55

Args:

56

processors: List of processor objects

57

"""

58

print(f"Found {len(processors)} processors:")

59

print("-" * 80)

60

61

for processor in processors:

62

print(f"Name: {processor.display_name}")

63

print(f"ID: {processor.name.split('/')[-1]}")

64

print(f"Type: {processor.type_}")

65

print(f"State: {processor.state}")

66

print(f"Default Version: {processor.default_processor_version}")

67

print(f"Created: {processor.create_time}")

68

print("-" * 80)

69

```

70

71

### Fetch Processor Types

72

73

```python { .api }

74

from google.cloud.documentai import DocumentProcessorServiceClient

75

from google.cloud.documentai.types import FetchProcessorTypesRequest

76

77

def fetch_processor_types(project_id: str, location: str) -> list["ProcessorType"]:

78

"""

79

Fetch available processor types for a location.

80

81

Args:

82

project_id: Google Cloud project ID

83

location: Processor location

84

85

Returns:

86

list[ProcessorType]: Available processor types

87

"""

88

client = DocumentProcessorServiceClient()

89

90

# Build parent path

91

parent = client.common_location_path(project_id, location)

92

93

# Create request

94

request = FetchProcessorTypesRequest(parent=parent)

95

96

# Fetch processor types

97

response = client.fetch_processor_types(request=request)

98

99

processor_types = []

100

for processor_type in response.processor_types:

101

processor_types.append(processor_type)

102

103

return processor_types

104

```

105

106

### List Processor Types

107

108

```python { .api }

109

from google.cloud.documentai import DocumentProcessorServiceClient

110

from google.cloud.documentai.types import ListProcessorTypesRequest

111

112

def list_processor_types(project_id: str, location: str) -> list["ProcessorType"]:

113

"""

114

List available processor types for creation.

115

116

Args:

117

project_id: Google Cloud project ID

118

location: Processor location

119

120

Returns:

121

list[ProcessorType]: Available processor types

122

"""

123

client = DocumentProcessorServiceClient()

124

125

# Build parent path

126

parent = client.common_location_path(project_id, location)

127

128

# Create request

129

request = ListProcessorTypesRequest(parent=parent)

130

131

# List processor types

132

response = client.list_processor_types(request=request)

133

134

processor_types = []

135

for processor_type in response.processor_types:

136

processor_types.append(processor_type)

137

138

return processor_types

139

140

def display_processor_types(processor_types: list["ProcessorType"]) -> None:

141

"""

142

Display available processor types.

143

144

Args:

145

processor_types: List of ProcessorType objects

146

"""

147

print(f"Available processor types ({len(processor_types)}):")

148

print("-" * 60)

149

150

# Group by category for better display

151

categories = {}

152

for proc_type in processor_types:

153

category = proc_type.category

154

if category not in categories:

155

categories[category] = []

156

categories[category].append(proc_type)

157

158

for category, types in categories.items():

159

print(f"\n{category}:")

160

for proc_type in types:

161

print(f" - {proc_type.display_name}")

162

print(f" Type: {proc_type.type_}")

163

if proc_type.allow_creation:

164

print(" ✓ Available for creation")

165

print()

166

```

167

168

### Get Specific Processor

169

170

```python { .api }

171

from google.cloud.documentai import DocumentProcessorServiceClient

172

from google.cloud.documentai.types import GetProcessorRequest

173

174

def get_processor(project_id: str, location: str, processor_id: str) -> "Processor":

175

"""

176

Get details of a specific processor.

177

178

Args:

179

project_id: Google Cloud project ID

180

location: Processor location

181

processor_id: Processor ID

182

183

Returns:

184

Processor: Processor details

185

"""

186

client = DocumentProcessorServiceClient()

187

188

# Build processor name

189

name = client.processor_path(project_id, location, processor_id)

190

191

# Create request

192

request = GetProcessorRequest(name=name)

193

194

# Get processor

195

processor = client.get_processor(request=request)

196

197

return processor

198

```

199

200

## Create Processors

201

202

### Create New Processor

203

204

```python { .api }

205

from google.cloud.documentai import DocumentProcessorServiceClient

206

from google.cloud.documentai.types import CreateProcessorRequest, Processor

207

208

def create_processor(

209

project_id: str,

210

location: str,

211

display_name: str,

212

processor_type: str

213

) -> "Processor":

214

"""

215

Create a new document processor.

216

217

Args:

218

project_id: Google Cloud project ID

219

location: Processor location

220

display_name: Human-readable name for the processor

221

processor_type: Type of processor to create (e.g., 'OCR_PROCESSOR')

222

223

Returns:

224

Processor: Created processor instance

225

"""

226

client = DocumentProcessorServiceClient()

227

228

# Build parent path

229

parent = client.common_location_path(project_id, location)

230

231

# Create processor object

232

processor = Processor(

233

display_name=display_name,

234

type_=processor_type

235

)

236

237

# Create request

238

request = CreateProcessorRequest(

239

parent=parent,

240

processor=processor

241

)

242

243

# Create processor

244

created_processor = client.create_processor(request=request)

245

246

print(f"Created processor: {created_processor.display_name}")

247

print(f"Processor ID: {created_processor.name.split('/')[-1]}")

248

249

return created_processor

250

251

def create_common_processors(project_id: str, location: str) -> dict[str, "Processor"]:

252

"""

253

Create commonly used processors.

254

255

Args:

256

project_id: Google Cloud project ID

257

location: Processor location

258

259

Returns:

260

dict[str, Processor]: Created processors by type

261

"""

262

processors = {}

263

264

# Common processor types

265

common_types = [

266

("OCR_PROCESSOR", "General OCR Processor"),

267

("FORM_PARSER_PROCESSOR", "Form Parser"),

268

("INVOICE_PROCESSOR", "Invoice Processor"),

269

("RECEIPT_PROCESSOR", "Receipt Processor")

270

]

271

272

for processor_type, display_name in common_types:

273

try:

274

processor = create_processor(

275

project_id=project_id,

276

location=location,

277

display_name=display_name,

278

processor_type=processor_type

279

)

280

processors[processor_type] = processor

281

except Exception as e:

282

print(f"Failed to create {processor_type}: {e}")

283

284

return processors

285

```

286

287

## Processor State Management

288

289

### Enable/Disable Processors

290

291

```python { .api }

292

from google.cloud.documentai import DocumentProcessorServiceClient

293

from google.cloud.documentai.types import (

294

EnableProcessorRequest,

295

DisableProcessorRequest

296

)

297

298

def enable_processor(project_id: str, location: str, processor_id: str) -> "EnableProcessorResponse":

299

"""

300

Enable a disabled processor.

301

302

Args:

303

project_id: Google Cloud project ID

304

location: Processor location

305

processor_id: Processor ID to enable

306

307

Returns:

308

EnableProcessorResponse: Operation response

309

"""

310

client = DocumentProcessorServiceClient()

311

312

# Build processor name

313

name = client.processor_path(project_id, location, processor_id)

314

315

# Create request

316

request = EnableProcessorRequest(name=name)

317

318

# Enable processor (this is a long-running operation)

319

operation = client.enable_processor(request=request)

320

321

print(f"Enabling processor {processor_id}...")

322

323

# Wait for operation to complete

324

response = operation.result()

325

326

print(f"Processor {processor_id} enabled successfully")

327

return response

328

329

def disable_processor(project_id: str, location: str, processor_id: str) -> "DisableProcessorResponse":

330

"""

331

Disable an active processor.

332

333

Args:

334

project_id: Google Cloud project ID

335

location: Processor location

336

processor_id: Processor ID to disable

337

338

Returns:

339

DisableProcessorResponse: Operation response

340

"""

341

client = DocumentProcessorServiceClient()

342

343

# Build processor name

344

name = client.processor_path(project_id, location, processor_id)

345

346

# Create request

347

request = DisableProcessorRequest(name=name)

348

349

# Disable processor (this is a long-running operation)

350

operation = client.disable_processor(request=request)

351

352

print(f"Disabling processor {processor_id}...")

353

354

# Wait for operation to complete

355

response = operation.result()

356

357

print(f"Processor {processor_id} disabled successfully")

358

return response

359

```

360

361

### Delete Processors

362

363

```python { .api }

364

from google.cloud.documentai import DocumentProcessorServiceClient

365

from google.cloud.documentai.types import DeleteProcessorRequest

366

367

def delete_processor(project_id: str, location: str, processor_id: str) -> None:

368

"""

369

Delete a processor permanently.

370

371

Args:

372

project_id: Google Cloud project ID

373

location: Processor location

374

processor_id: Processor ID to delete

375

376

Note:

377

This operation is irreversible. Ensure the processor is disabled first.

378

"""

379

client = DocumentProcessorServiceClient()

380

381

# Build processor name

382

name = client.processor_path(project_id, location, processor_id)

383

384

# First, ensure processor is disabled

385

try:

386

processor = get_processor(project_id, location, processor_id)

387

if processor.state == "ENABLED":

388

print("Processor is enabled. Disabling first...")

389

disable_processor(project_id, location, processor_id)

390

except Exception as e:

391

print(f"Warning: Could not check processor state: {e}")

392

393

# Create delete request

394

request = DeleteProcessorRequest(name=name)

395

396

# Delete processor (this is a long-running operation)

397

operation = client.delete_processor(request=request)

398

399

print(f"Deleting processor {processor_id}...")

400

401

# Wait for operation to complete

402

operation.result()

403

404

print(f"Processor {processor_id} deleted successfully")

405

```

406

407

## Processor Version Management

408

409

### List Processor Versions

410

411

```python { .api }

412

from google.cloud.documentai import DocumentProcessorServiceClient

413

from google.cloud.documentai.types import ListProcessorVersionsRequest

414

415

def list_processor_versions(

416

project_id: str,

417

location: str,

418

processor_id: str

419

) -> list["ProcessorVersion"]:

420

"""

421

List all versions of a processor.

422

423

Args:

424

project_id: Google Cloud project ID

425

location: Processor location

426

processor_id: Processor ID

427

428

Returns:

429

list[ProcessorVersion]: List of processor versions

430

"""

431

client = DocumentProcessorServiceClient()

432

433

# Build processor path as parent

434

parent = client.processor_path(project_id, location, processor_id)

435

436

# Create request

437

request = ListProcessorVersionsRequest(parent=parent)

438

439

# List versions

440

response = client.list_processor_versions(request=request)

441

442

versions = []

443

for version in response.processor_versions:

444

versions.append(version)

445

446

return versions

447

448

def display_processor_versions(versions: list["ProcessorVersion"]) -> None:

449

"""

450

Display processor version information.

451

452

Args:

453

versions: List of ProcessorVersion objects

454

"""

455

print(f"Found {len(versions)} processor versions:")

456

print("-" * 70)

457

458

for version in versions:

459

version_id = version.name.split('/')[-1]

460

print(f"Version ID: {version_id}")

461

print(f"Display Name: {version.display_name}")

462

print(f"State: {version.state}")

463

print(f"Created: {version.create_time}")

464

465

if version.model_type:

466

print(f"Model Type: {version.model_type}")

467

468

if version.latest_evaluation:

469

print(f"Latest Evaluation: {version.latest_evaluation}")

470

471

print("-" * 70)

472

```

473

474

### Deploy Processor Versions

475

476

```python { .api }

477

from google.cloud.documentai import DocumentProcessorServiceClient

478

from google.cloud.documentai.types import DeployProcessorVersionRequest

479

480

def deploy_processor_version(

481

project_id: str,

482

location: str,

483

processor_id: str,

484

version_id: str

485

) -> "DeployProcessorVersionResponse":

486

"""

487

Deploy a processor version for serving.

488

489

Args:

490

project_id: Google Cloud project ID

491

location: Processor location

492

processor_id: Processor ID

493

version_id: Version ID to deploy

494

495

Returns:

496

DeployProcessorVersionResponse: Deployment response

497

"""

498

client = DocumentProcessorServiceClient()

499

500

# Build processor version name

501

name = client.processor_version_path(

502

project_id, location, processor_id, version_id

503

)

504

505

# Create request

506

request = DeployProcessorVersionRequest(name=name)

507

508

# Deploy version (this is a long-running operation)

509

operation = client.deploy_processor_version(request=request)

510

511

print(f"Deploying processor version {version_id}...")

512

513

# Wait for operation to complete

514

response = operation.result()

515

516

print(f"Processor version {version_id} deployed successfully")

517

return response

518

519

def undeploy_processor_version(

520

project_id: str,

521

location: str,

522

processor_id: str,

523

version_id: str

524

) -> "UndeployProcessorVersionResponse":

525

"""

526

Undeploy a processor version from serving.

527

528

Args:

529

project_id: Google Cloud project ID

530

location: Processor location

531

processor_id: Processor ID

532

version_id: Version ID to undeploy

533

534

Returns:

535

UndeployProcessorVersionResponse: Undeploy response

536

"""

537

client = DocumentProcessorServiceClient()

538

539

# Build processor version name

540

name = client.processor_version_path(

541

project_id, location, processor_id, version_id

542

)

543

544

# Create request

545

request = UndeployProcessorVersionRequest(name=name)

546

547

# Undeploy version (this is a long-running operation)

548

operation = client.undeploy_processor_version(request=request)

549

550

print(f"Undeploying processor version {version_id}...")

551

552

# Wait for operation to complete

553

response = operation.result()

554

555

print(f"Processor version {version_id} undeployed successfully")

556

return response

557

```

558

559

### Set Default Processor Version

560

561

```python { .api }

562

from google.cloud.documentai import DocumentProcessorServiceClient

563

from google.cloud.documentai.types import SetDefaultProcessorVersionRequest

564

565

def set_default_processor_version(

566

project_id: str,

567

location: str,

568

processor_id: str,

569

version_id: str

570

) -> "SetDefaultProcessorVersionResponse":

571

"""

572

Set the default version for a processor.

573

574

Args:

575

project_id: Google Cloud project ID

576

location: Processor location

577

processor_id: Processor ID

578

version_id: Version ID to set as default

579

580

Returns:

581

SetDefaultProcessorVersionResponse: Response with updated processor

582

"""

583

client = DocumentProcessorServiceClient()

584

585

# Build processor path

586

processor_name = client.processor_path(project_id, location, processor_id)

587

588

# Build version path

589

version_name = client.processor_version_path(

590

project_id, location, processor_id, version_id

591

)

592

593

# Create request

594

request = SetDefaultProcessorVersionRequest(

595

processor=processor_name,

596

default_processor_version=version_name

597

)

598

599

# Set default version (this is a long-running operation)

600

operation = client.set_default_processor_version(request=request)

601

602

print(f"Setting default version to {version_id}...")

603

604

# Wait for operation to complete

605

response = operation.result()

606

607

print(f"Default version set to {version_id} successfully")

608

return response

609

```

610

611

## Custom Processor Training

612

613

### Train Processor Version

614

615

```python { .api }

616

from google.cloud.documentai import DocumentProcessorServiceClient

617

from google.cloud.documentai.types import (

618

TrainProcessorVersionRequest,

619

DocumentSchema

620

)

621

622

def train_processor_version(

623

project_id: str,

624

location: str,

625

processor_id: str,

626

version_display_name: str,

627

training_dataset: str,

628

test_dataset: str = None,

629

document_schema: "DocumentSchema" = None

630

) -> "TrainProcessorVersionResponse":

631

"""

632

Train a new version of a custom processor.

633

634

Args:

635

project_id: Google Cloud project ID

636

location: Processor location

637

processor_id: Processor ID to train

638

version_display_name: Display name for new version

639

training_dataset: Path to training dataset

640

test_dataset: Optional path to test dataset

641

document_schema: Optional document schema for training

642

643

Returns:

644

TrainProcessorVersionResponse: Training response with new version

645

"""

646

client = DocumentProcessorServiceClient()

647

648

# Build processor path as parent

649

parent = client.processor_path(project_id, location, processor_id)

650

651

# Create processor version configuration

652

processor_version = {

653

"display_name": version_display_name

654

}

655

656

# Add document schema if provided

657

if document_schema:

658

processor_version["document_schema"] = document_schema

659

660

# Create training input configuration

661

input_data = {

662

"training_documents": {

663

"gcs_prefix": {"gcs_uri_prefix": training_dataset}

664

}

665

}

666

667

# Add test dataset if provided

668

if test_dataset:

669

input_data["test_documents"] = {

670

"gcs_prefix": {"gcs_uri_prefix": test_dataset}

671

}

672

673

# Create request

674

request = TrainProcessorVersionRequest(

675

parent=parent,

676

processor_version=processor_version,

677

input_data=input_data

678

)

679

680

# Start training (this is a long-running operation)

681

operation = client.train_processor_version(request=request)

682

683

print(f"Starting training for processor version: {version_display_name}")

684

print("This operation may take several hours to complete...")

685

686

# For production, you'd typically not wait for completion here

687

# Instead, you'd check the operation status periodically

688

print(f"Training operation name: {operation.operation.name}")

689

690

return operation

691

692

def check_training_progress(operation_name: str) -> dict:

693

"""

694

Check the progress of a training operation.

695

696

Args:

697

operation_name: Name of the training operation

698

699

Returns:

700

dict: Operation status and progress information

701

"""

702

from google.api_core import operations_v1

703

from google.auth import default

704

705

# Get credentials and create operations client

706

credentials, project = default()

707

operations_client = operations_v1.OperationsClient(credentials=credentials)

708

709

# Get operation status

710

operation = operations_client.get_operation(name=operation_name)

711

712

status_info = {

713

"name": operation.name,

714

"done": operation.done,

715

"metadata": None,

716

"result": None,

717

"error": None

718

}

719

720

if operation.metadata:

721

# Parse metadata for progress information

722

status_info["metadata"] = operation.metadata

723

724

if operation.done:

725

if operation.error:

726

status_info["error"] = operation.error

727

else:

728

status_info["result"] = operation.response

729

730

return status_info

731

```

732

733

## Processor Evaluation

734

735

### Evaluate Processor Performance

736

737

```python { .api }

738

from google.cloud.documentai import DocumentProcessorServiceClient

739

from google.cloud.documentai.types import (

740

EvaluateProcessorVersionRequest,

741

EvaluationReference

742

)

743

744

def evaluate_processor_version(

745

project_id: str,

746

location: str,

747

processor_id: str,

748

version_id: str,

749

evaluation_documents: str

750

) -> "EvaluateProcessorVersionResponse":

751

"""

752

Evaluate the performance of a processor version.

753

754

Args:

755

project_id: Google Cloud project ID

756

location: Processor location

757

processor_id: Processor ID

758

version_id: Version ID to evaluate

759

evaluation_documents: GCS path to evaluation documents

760

761

Returns:

762

EvaluateProcessorVersionResponse: Evaluation response

763

"""

764

client = DocumentProcessorServiceClient()

765

766

# Build processor version name

767

processor_version = client.processor_version_path(

768

project_id, location, processor_id, version_id

769

)

770

771

# Create evaluation documents configuration

772

evaluation_documents_config = {

773

"gcs_prefix": {"gcs_uri_prefix": evaluation_documents}

774

}

775

776

# Create request

777

request = EvaluateProcessorVersionRequest(

778

processor_version=processor_version,

779

evaluation_documents=evaluation_documents_config

780

)

781

782

# Start evaluation (this is a long-running operation)

783

operation = client.evaluate_processor_version(request=request)

784

785

print(f"Starting evaluation for processor version {version_id}...")

786

787

# Wait for evaluation to complete

788

response = operation.result()

789

790

print("Evaluation completed successfully")

791

return response

792

793

def list_evaluations(

794

project_id: str,

795

location: str,

796

processor_id: str,

797

version_id: str

798

) -> list["Evaluation"]:

799

"""

800

List all evaluations for a processor version.

801

802

Args:

803

project_id: Google Cloud project ID

804

location: Processor location

805

processor_id: Processor ID

806

version_id: Version ID

807

808

Returns:

809

list[Evaluation]: List of evaluation results

810

"""

811

client = DocumentProcessorServiceClient()

812

813

# Build processor version path as parent

814

parent = client.processor_version_path(

815

project_id, location, processor_id, version_id

816

)

817

818

# Create request

819

request = ListEvaluationsRequest(parent=parent)

820

821

# List evaluations

822

response = client.list_evaluations(request=request)

823

824

evaluations = []

825

for evaluation in response.evaluations:

826

evaluations.append(evaluation)

827

828

return evaluations

829

830

def get_evaluation_details(

831

project_id: str,

832

location: str,

833

processor_id: str,

834

version_id: str,

835

evaluation_id: str

836

) -> "Evaluation":

837

"""

838

Get detailed evaluation results.

839

840

Args:

841

project_id: Google Cloud project ID

842

location: Processor location

843

processor_id: Processor ID

844

version_id: Version ID

845

evaluation_id: Evaluation ID

846

847

Returns:

848

Evaluation: Detailed evaluation results

849

"""

850

client = DocumentProcessorServiceClient()

851

852

# Build evaluation name

853

name = client.evaluation_path(

854

project_id, location, processor_id, version_id, evaluation_id

855

)

856

857

# Create request

858

request = GetEvaluationRequest(name=name)

859

860

# Get evaluation

861

evaluation = client.get_evaluation(request=request)

862

863

return evaluation

864

```

865

866

## Complete Processor Management Example

867

868

```python { .api }

869

def complete_processor_management_example():

870

"""

871

Complete example demonstrating processor lifecycle management.

872

"""

873

project_id = "my-project"

874

location = "us"

875

876

client = DocumentProcessorServiceClient()

877

878

# 1. List existing processors

879

print("=== LISTING PROCESSORS ===")

880

processors = list_processors(project_id, location)

881

display_processor_info(processors)

882

883

# 2. Create a new processor if needed

884

print("\n=== CREATING PROCESSOR ===")

885

processor = create_processor(

886

project_id=project_id,

887

location=location,

888

display_name="My Custom Invoice Processor",

889

processor_type="INVOICE_PROCESSOR"

890

)

891

processor_id = processor.name.split('/')[-1]

892

893

# 3. Enable the processor

894

print("\n=== ENABLING PROCESSOR ===")

895

enable_processor(project_id, location, processor_id)

896

897

# 4. List processor versions

898

print("\n=== LISTING VERSIONS ===")

899

versions = list_processor_versions(project_id, location, processor_id)

900

display_processor_versions(versions)

901

902

# 5. Get processor details

903

print("\n=== PROCESSOR DETAILS ===")

904

processor_details = get_processor(project_id, location, processor_id)

905

print(f"Processor State: {processor_details.state}")

906

print(f"Default Version: {processor_details.default_processor_version}")

907

908

# 6. Evaluate processor (if evaluation data available)

909

# evaluation_gcs_path = "gs://my-bucket/evaluation-docs/"

910

# evaluation = evaluate_processor_version(

911

# project_id, location, processor_id, version_id, evaluation_gcs_path

912

# )

913

914

print("\nProcessor management example completed!")

915

916

if __name__ == "__main__":

917

complete_processor_management_example()

918

```

919

920

This comprehensive guide covers all aspects of processor management in Google Cloud Document AI, from basic operations to advanced training and evaluation workflows.