or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-peptide-operations.mdadvanced-spectral-libraries.mdchemical-constants.mdfragment-ions.mdindex.mdio-utilities.mdprotein-analysis.mdpsm-readers.mdquantification.mdsmiles-chemistry.mdspectral-libraries.md

advanced-spectral-libraries.mddocs/

0

# Advanced Spectral Library Operations

1

2

Extended spectral library functionality including decoy generation, format conversion, library validation, and specialized library formats. Provides comprehensive tools for spectral library manipulation, quality control, and integration with various proteomics workflows and search engines.

3

4

## Capabilities

5

6

### Decoy Generation and Management

7

8

Comprehensive decoy generation capabilities supporting multiple strategies and integration with target-decoy search workflows.

9

10

```python { .api }

11

class SpecLibDecoy:

12

"""Extended spectral library with integrated decoy generation and management."""

13

14

def __init__(self, target_lib: SpecLibBase = None):

15

"""

16

Initialize spectral library with decoy capabilities.

17

18

Parameters:

19

- target_lib: Target spectral library to extend with decoys

20

"""

21

22

def generate_decoys(self, method: str = 'diann',

23

decoy_prefix: str = 'DECOY_',

24

keep_peptide_types: bool = True) -> None:

25

"""

26

Generate decoy sequences using specified method.

27

28

Parameters:

29

- method: Decoy generation method ('diann', 'pseudo_reverse', 'shuffle')

30

- decoy_prefix: Prefix for decoy protein identifiers

31

- keep_peptide_types: Preserve peptide characteristics in decoys

32

"""

33

34

def validate_decoy_quality(self) -> dict:

35

"""

36

Assess quality of generated decoy sequences.

37

38

Returns:

39

Dictionary with decoy quality metrics and statistics

40

"""

41

42

def get_target_decoy_ratio(self) -> float:

43

"""

44

Calculate ratio of target to decoy sequences.

45

46

Returns:

47

Target-to-decoy ratio

48

"""

49

50

def separate_targets_and_decoys(self) -> tuple['SpecLibBase', 'SpecLibBase']:

51

"""

52

Split library into separate target and decoy libraries.

53

54

Returns:

55

Tuple of (target_library, decoy_library)

56

"""

57

58

class DIANNDecoyGenerator:

59

"""DIANN-style decoy generation with advanced sequence manipulation."""

60

61

def __init__(self, keep_peptide_types: bool = True,

62

min_peptide_length: int = 6,

63

max_peptide_length: int = 30):

64

"""

65

Initialize DIANN decoy generator.

66

67

Parameters:

68

- keep_peptide_types: Preserve tryptic characteristics

69

- min_peptide_length: Minimum length for generated decoys

70

- max_peptide_length: Maximum length for generated decoys

71

"""

72

73

def generate_decoy_sequence(self, target_sequence: str,

74

target_proteins: str) -> tuple[str, str]:

75

"""

76

Generate single decoy sequence from target.

77

78

Parameters:

79

- target_sequence: Target peptide sequence

80

- target_proteins: Target protein identifiers

81

82

Returns:

83

Tuple of (decoy_sequence, decoy_proteins)

84

"""

85

86

def generate_decoy_library(self, target_lib: SpecLibBase,

87

decoy_prefix: str = 'DECOY_') -> SpecLibBase:

88

"""

89

Generate complete decoy library from target library.

90

91

Parameters:

92

- target_lib: Target spectral library

93

- decoy_prefix: Prefix for decoy identifiers

94

95

Returns:

96

New spectral library with decoy sequences

97

"""

98

99

def validate_sequence_properties(self, target_seq: str,

100

decoy_seq: str) -> dict:

101

"""

102

Compare properties between target and decoy sequences.

103

104

Parameters:

105

- target_seq: Original target sequence

106

- decoy_seq: Generated decoy sequence

107

108

Returns:

109

Dictionary with property comparisons

110

"""

111

112

class PseudoReverseDecoyGenerator:

113

"""Pseudo-reverse decoy generation with tryptic preservation."""

114

115

def __init__(self, cleavage_rule: str = 'trypsin'):

116

"""

117

Initialize pseudo-reverse generator.

118

119

Parameters:

120

- cleavage_rule: Enzyme cleavage specificity to preserve

121

"""

122

123

def generate_pseudo_reverse(self, sequence: str) -> str:

124

"""

125

Generate pseudo-reverse sequence preserving cleavage sites.

126

127

Parameters:

128

- sequence: Target peptide sequence

129

130

Returns:

131

Pseudo-reverse decoy sequence

132

"""

133

134

def preserve_cleavage_specificity(self, sequence: str,

135

enzyme: str = 'trypsin') -> str:

136

"""

137

Ensure decoy maintains enzymatic cleavage characteristics.

138

139

Parameters:

140

- sequence: Input sequence

141

- enzyme: Enzyme specificity to preserve

142

143

Returns:

144

Modified sequence with preserved cleavage sites

145

"""

146

147

class BaseDecoyGenerator:

148

"""Base class for custom decoy generation strategies."""

149

150

def __init__(self):

151

"""Initialize base decoy generator."""

152

153

def generate_decoy(self, target_sequence: str,

154

target_proteins: str,

155

**kwargs) -> tuple[str, str]:

156

"""

157

Generate decoy sequence (to be implemented by subclasses).

158

159

Parameters:

160

- target_sequence: Target peptide sequence

161

- target_proteins: Target protein identifiers

162

- **kwargs: Strategy-specific parameters

163

164

Returns:

165

Tuple of (decoy_sequence, decoy_proteins)

166

"""

167

raise NotImplementedError("Subclasses must implement generate_decoy")

168

169

def validate_decoy(self, target_seq: str, decoy_seq: str) -> bool:

170

"""

171

Validate generated decoy sequence.

172

173

Parameters:

174

- target_seq: Original target sequence

175

- decoy_seq: Generated decoy sequence

176

177

Returns:

178

True if decoy passes validation checks

179

"""

180

return True

181

182

class SpecLibDecoyProvider:

183

"""Provider system for decoy generation strategies."""

184

185

@staticmethod

186

def get_generator(method: str, **kwargs) -> BaseDecoyGenerator:

187

"""

188

Get decoy generator instance by method name.

189

190

Parameters:

191

- method: Generator method ('diann', 'pseudo_reverse', 'shuffle')

192

- **kwargs: Method-specific parameters

193

194

Returns:

195

Configured decoy generator instance

196

"""

197

198

@staticmethod

199

def list_available_methods() -> List[str]:

200

"""

201

List all available decoy generation methods.

202

203

Returns:

204

List of method names

205

"""

206

207

@staticmethod

208

def register_custom_generator(name: str,

209

generator_class: type) -> None:

210

"""

211

Register custom decoy generation method.

212

213

Parameters:

214

- name: Name for the custom method

215

- generator_class: Class implementing BaseDecoyGenerator

216

"""

217

```

218

219

### Flat Spectral Library Format

220

221

Specialized flat format for efficient storage and retrieval of large spectral libraries.

222

223

```python { .api }

224

class SpecLibFlat:

225

"""Flat spectral library format optimized for large-scale storage."""

226

227

def __init__(self):

228

"""Initialize flat spectral library."""

229

230

def from_spec_lib(self, spec_lib: SpecLibBase) -> None:

231

"""

232

Convert standard spectral library to flat format.

233

234

Parameters:

235

- spec_lib: Standard SpecLibBase to convert

236

"""

237

238

def to_spec_lib(self) -> SpecLibBase:

239

"""

240

Convert flat library back to standard format.

241

242

Returns:

243

Standard SpecLibBase instance

244

"""

245

246

def save_flat(self, filepath: str,

247

compression: str = 'gzip') -> None:

248

"""

249

Save flat library to compressed file.

250

251

Parameters:

252

- filepath: Output file path

253

- compression: Compression method ('gzip', 'bz2', 'xz')

254

"""

255

256

def load_flat(self, filepath: str) -> None:

257

"""

258

Load flat library from compressed file.

259

260

Parameters:

261

- filepath: Input file path

262

"""

263

264

def get_precursor_range(self, start_idx: int,

265

end_idx: int) -> pd.DataFrame:

266

"""

267

Get precursor range without loading full library.

268

269

Parameters:

270

- start_idx: Starting precursor index

271

- end_idx: Ending precursor index

272

273

Returns:

274

DataFrame with precursor range

275

"""

276

277

def query_by_mz_range(self, min_mz: float,

278

max_mz: float) -> pd.DataFrame:

279

"""

280

Query precursors by m/z range efficiently.

281

282

Parameters:

283

- min_mz: Minimum m/z value

284

- max_mz: Maximum m/z value

285

286

Returns:

287

DataFrame with precursors in m/z range

288

"""

289

290

def create_index(self, index_type: str = 'mz') -> None:

291

"""

292

Create optimized index for fast queries.

293

294

Parameters:

295

- index_type: Type of index ('mz', 'rt', 'sequence')

296

"""

297

298

def optimize_storage(self) -> dict:

299

"""

300

Optimize storage layout and compression.

301

302

Returns:

303

Dictionary with optimization statistics

304

"""

305

```

306

307

### Library Readers and Format Conversion

308

309

Comprehensive readers for various spectral library formats and conversion utilities.

310

311

```python { .api }

312

class LibraryReaderBase:

313

"""Base class for spectral library format readers."""

314

315

def __init__(self):

316

"""Initialize library reader."""

317

318

def read_library(self, filepath: str, **kwargs) -> SpecLibBase:

319

"""

320

Read spectral library from file.

321

322

Parameters:

323

- filepath: Path to library file

324

- **kwargs: Format-specific options

325

326

Returns:

327

Loaded spectral library

328

"""

329

raise NotImplementedError("Subclasses must implement read_library")

330

331

def validate_format(self, filepath: str) -> bool:

332

"""

333

Validate if file matches expected format.

334

335

Parameters:

336

- filepath: File path to validate

337

338

Returns:

339

True if format is compatible

340

"""

341

return True

342

343

def get_library_info(self, filepath: str) -> dict:

344

"""

345

Get library metadata without full loading.

346

347

Parameters:

348

- filepath: Library file path

349

350

Returns:

351

Dictionary with library information

352

"""

353

return {}

354

355

class CSVLibraryReader(LibraryReaderBase):

356

"""Reader for CSV-format spectral libraries."""

357

358

def __init__(self, delimiter: str = ','):

359

"""

360

Initialize CSV reader.

361

362

Parameters:

363

- delimiter: CSV delimiter character

364

"""

365

366

def read_library(self, filepath: str, **kwargs) -> SpecLibBase:

367

"""

368

Read spectral library from CSV file.

369

370

Parameters:

371

- filepath: Path to CSV library file

372

- **kwargs: CSV reading options

373

374

Returns:

375

Loaded spectral library

376

"""

377

378

def set_column_mapping(self, mapping: dict) -> None:

379

"""

380

Set custom column name mappings.

381

382

Parameters:

383

- mapping: Dictionary mapping CSV columns to standard names

384

"""

385

386

class TSVLibraryReader(LibraryReaderBase):

387

"""Reader for TSV-format spectral libraries."""

388

389

def __init__(self):

390

"""Initialize TSV reader."""

391

392

def read_library(self, filepath: str, **kwargs) -> SpecLibBase:

393

"""

394

Read spectral library from TSV file.

395

396

Parameters:

397

- filepath: Path to TSV library file

398

- **kwargs: TSV reading options

399

400

Returns:

401

Loaded spectral library

402

"""

403

404

class MSPLibraryReader(LibraryReaderBase):

405

"""Reader for MSP-format spectral libraries."""

406

407

def __init__(self):

408

"""Initialize MSP reader."""

409

410

def read_library(self, filepath: str, **kwargs) -> SpecLibBase:

411

"""

412

Read spectral library from MSP file.

413

414

Parameters:

415

- filepath: Path to MSP library file

416

- **kwargs: MSP reading options

417

418

Returns:

419

Loaded spectral library

420

"""

421

422

def parse_msp_entry(self, entry_text: str) -> dict:

423

"""

424

Parse individual MSP library entry.

425

426

Parameters:

427

- entry_text: Raw MSP entry text

428

429

Returns:

430

Dictionary with parsed entry information

431

"""

432

433

def get_library_reader(filepath: str) -> LibraryReaderBase:

434

"""

435

Auto-detect and return appropriate library reader.

436

437

Parameters:

438

- filepath: Path to library file

439

440

Returns:

441

Appropriate reader instance for the file format

442

"""

443

444

def convert_library_format(input_path: str,

445

output_path: str,

446

input_format: str = None,

447

output_format: str = 'hdf5') -> None:

448

"""

449

Convert spectral library between formats.

450

451

Parameters:

452

- input_path: Input library file path

453

- output_path: Output library file path

454

- input_format: Input format (auto-detected if None)

455

- output_format: Output format ('hdf5', 'csv', 'msp')

456

"""

457

```

458

459

### Library Translation and Format Support

460

461

Utilities for translating between different spectral library formats and search engine requirements.

462

463

```python { .api }

464

class WritingProcess:

465

"""Multiprocessing writer for efficient library export."""

466

467

def __init__(self, n_processes: int = 4):

468

"""

469

Initialize multiprocessing writer.

470

471

Parameters:

472

- n_processes: Number of worker processes

473

"""

474

475

def write_library_parallel(self, spec_lib: SpecLibBase,

476

output_path: str,

477

format_type: str = 'tsv',

478

chunk_size: int = 10000) -> None:

479

"""

480

Write library using parallel processing.

481

482

Parameters:

483

- spec_lib: Spectral library to write

484

- output_path: Output file path

485

- format_type: Output format

486

- chunk_size: Number of precursors per chunk

487

"""

488

489

def write_multiple_formats(self, spec_lib: SpecLibBase,

490

base_path: str,

491

formats: List[str]) -> dict:

492

"""

493

Write library in multiple formats simultaneously.

494

495

Parameters:

496

- spec_lib: Spectral library to write

497

- base_path: Base output path (extensions added automatically)

498

- formats: List of output formats

499

500

Returns:

501

Dictionary mapping formats to output file paths

502

"""

503

504

def translate_to_diann_format(spec_lib: SpecLibBase,

505

output_path: str) -> None:

506

"""

507

Translate library to DIA-NN compatible format.

508

509

Parameters:

510

- spec_lib: Input spectral library

511

- output_path: Output file path for DIA-NN library

512

"""

513

514

def translate_to_spectronaut_format(spec_lib: SpecLibBase,

515

output_path: str) -> None:

516

"""

517

Translate library to Spectronaut compatible format.

518

519

Parameters:

520

- spec_lib: Input spectral library

521

- output_path: Output file path for Spectronaut library

522

"""

523

524

def translate_to_openswath_format(spec_lib: SpecLibBase,

525

output_path: str) -> None:

526

"""

527

Translate library to OpenSWATH compatible format.

528

529

Parameters:

530

- spec_lib: Input spectral library

531

- output_path: Output file path for OpenSWATH library

532

"""

533

534

def translate_to_skyline_format(spec_lib: SpecLibBase,

535

output_path: str) -> None:

536

"""

537

Translate library to Skyline compatible format.

538

539

Parameters:

540

- spec_lib: Input spectral library

541

- output_path: Output file path for Skyline library

542

"""

543

544

def create_search_engine_libraries(spec_lib: SpecLibBase,

545

output_dir: str,

546

engines: List[str] = None) -> dict:

547

"""

548

Create libraries for multiple search engines.

549

550

Parameters:

551

- spec_lib: Input spectral library

552

- output_dir: Directory for output files

553

- engines: List of search engines ('diann', 'spectronaut', 'openswath')

554

555

Returns:

556

Dictionary mapping engines to output file paths

557

"""

558

```

559

560

### Library Validation and Quality Control

561

562

Comprehensive validation system for assessing spectral library quality and completeness.

563

564

```python { .api }

565

class Schema:

566

"""Schema validation system for spectral libraries."""

567

568

def __init__(self, required_columns: List[str] = None,

569

optional_columns: List[str] = None):

570

"""

571

Initialize schema validator.

572

573

Parameters:

574

- required_columns: List of required column names

575

- optional_columns: List of optional column names

576

"""

577

578

def validate_library(self, spec_lib: SpecLibBase) -> dict:

579

"""

580

Validate spectral library against schema.

581

582

Parameters:

583

- spec_lib: Spectral library to validate

584

585

Returns:

586

Dictionary with validation results and issues

587

"""

588

589

def add_column_requirement(self, column: str,

590

requirement_type: str,

591

**kwargs) -> None:

592

"""

593

Add column validation requirement.

594

595

Parameters:

596

- column: Column name

597

- requirement_type: Type of requirement ('required', 'optional', 'forbidden')

598

- **kwargs: Additional requirement parameters

599

"""

600

601

class Required:

602

"""Required column specification for schema validation."""

603

604

def __init__(self, column_name: str,

605

data_type: type = None,

606

validation_func: callable = None):

607

"""

608

Define required column.

609

610

Parameters:

611

- column_name: Name of required column

612

- data_type: Expected data type

613

- validation_func: Custom validation function

614

"""

615

616

def validate(self, df: pd.DataFrame) -> dict:

617

"""

618

Validate column presence and properties.

619

620

Parameters:

621

- df: DataFrame to validate

622

623

Returns:

624

Validation result dictionary

625

"""

626

627

class Optional:

628

"""Optional column specification for schema validation."""

629

630

def __init__(self, column_name: str,

631

data_type: type = None,

632

default_value=None):

633

"""

634

Define optional column.

635

636

Parameters:

637

- column_name: Name of optional column

638

- data_type: Expected data type if present

639

- default_value: Default value if column missing

640

"""

641

642

def validate(self, df: pd.DataFrame) -> dict:

643

"""

644

Validate optional column if present.

645

646

Parameters:

647

- df: DataFrame to validate

648

649

Returns:

650

Validation result dictionary

651

"""

652

653

class Column:

654

"""Generic column specification with flexible validation."""

655

656

def __init__(self, name: str,

657

required: bool = True,

658

data_type: type = None,

659

min_value=None,

660

max_value=None,

661

allowed_values: List = None):

662

"""

663

Define column specification.

664

665

Parameters:

666

- name: Column name

667

- required: Whether column is required

668

- data_type: Expected data type

669

- min_value: Minimum allowed value

670

- max_value: Maximum allowed value

671

- allowed_values: List of allowed values

672

"""

673

674

def validate(self, df: pd.DataFrame) -> dict:

675

"""

676

Perform comprehensive column validation.

677

678

Parameters:

679

- df: DataFrame to validate

680

681

Returns:

682

Detailed validation results

683

"""

684

685

def validate_spectral_library_completeness(spec_lib: SpecLibBase) -> dict:

686

"""

687

Validate spectral library completeness and consistency.

688

689

Parameters:

690

- spec_lib: Spectral library to validate

691

692

Returns:

693

Dictionary with completeness assessment

694

"""

695

696

def assess_library_quality_metrics(spec_lib: SpecLibBase) -> dict:

697

"""

698

Calculate comprehensive library quality metrics.

699

700

Parameters:

701

- spec_lib: Spectral library to assess

702

703

Returns:

704

Dictionary with quality metrics and statistics

705

"""

706

707

def check_library_integrity(spec_lib: SpecLibBase) -> dict:

708

"""

709

Check spectral library data integrity.

710

711

Parameters:

712

- spec_lib: Spectral library to check

713

714

Returns:

715

Dictionary with integrity check results

716

"""

717

718

def generate_library_report(spec_lib: SpecLibBase,

719

output_path: str = None) -> dict:

720

"""

721

Generate comprehensive library quality report.

722

723

Parameters:

724

- spec_lib: Spectral library to analyze

725

- output_path: Optional path to save HTML report

726

727

Returns:

728

Dictionary with report data and statistics

729

"""

730

```

731

732

## Usage Examples

733

734

### Decoy Generation and Management

735

736

```python

737

from alphabase.spectral_library.decoy import SpecLibDecoy, DIANNDecoyGenerator

738

from alphabase.spectral_library.base import SpecLibBase

739

import pandas as pd

740

741

# Create target library

742

target_lib = SpecLibBase()

743

target_lib.precursor_df = pd.DataFrame({

744

'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],

745

'mods': ['', 'Phospho (STY)@2', ''],

746

'charge': [2, 3, 2],

747

'proteins': ['P12345', 'P67890', 'P11111']

748

})

749

target_lib.refine_df()

750

751

# Create decoy library using DIANN method

752

decoy_lib = SpecLibDecoy(target_lib)

753

decoy_lib.generate_decoys(method='diann', decoy_prefix='DECOY_')

754

755

print(f"Target precursors: {len(target_lib.precursor_df)}")

756

print(f"Total with decoys: {len(decoy_lib.precursor_df)}")

757

print(f"Target-decoy ratio: {decoy_lib.get_target_decoy_ratio():.1f}")

758

759

# Validate decoy quality

760

quality_metrics = decoy_lib.validate_decoy_quality()

761

print(f"Decoy quality metrics: {quality_metrics}")

762

763

# Separate targets and decoys

764

targets, decoys = decoy_lib.separate_targets_and_decoys()

765

print(f"Separated: {len(targets.precursor_df)} targets, {len(decoys.precursor_df)} decoys")

766

```

767

768

### Advanced Decoy Generation

769

770

```python

771

from alphabase.spectral_library.decoy import (

772

DIANNDecoyGenerator, PseudoReverseDecoyGenerator, SpecLibDecoyProvider

773

)

774

775

# Use DIANN decoy generator directly

776

diann_gen = DIANNDecoyGenerator(keep_peptide_types=True)

777

target_seq = "PEPTIDE"

778

decoy_seq, decoy_proteins = diann_gen.generate_decoy_sequence(

779

target_seq, "P12345"

780

)

781

print(f"DIANN decoy: {target_seq} -> {decoy_seq}")

782

783

# Validate sequence properties

784

properties = diann_gen.validate_sequence_properties(target_seq, decoy_seq)

785

print(f"Property comparison: {properties}")

786

787

# Use pseudo-reverse generator

788

pseudo_gen = PseudoReverseDecoyGenerator(cleavage_rule='trypsin')

789

pseudo_decoy = pseudo_gen.generate_pseudo_reverse(target_seq)

790

print(f"Pseudo-reverse decoy: {target_seq} -> {pseudo_decoy}")

791

792

# Use provider system

793

generator = SpecLibDecoyProvider.get_generator('diann', keep_peptide_types=True)

794

print(f"Available methods: {SpecLibDecoyProvider.list_available_methods()}")

795

```

796

797

### Flat Library Format Operations

798

799

```python

800

from alphabase.spectral_library.flat import SpecLibFlat

801

802

# Convert standard library to flat format

803

flat_lib = SpecLibFlat()

804

flat_lib.from_spec_lib(target_lib)

805

806

# Save in compressed format

807

flat_lib.save_flat('library_flat.gz', compression='gzip')

808

809

# Load flat library

810

new_flat = SpecLibFlat()

811

new_flat.load_flat('library_flat.gz')

812

813

# Efficient range queries

814

precursor_range = new_flat.get_precursor_range(0, 10)

815

print(f"First 10 precursors: {len(precursor_range)}")

816

817

# Query by m/z range

818

mz_range = new_flat.query_by_mz_range(400.0, 500.0)

819

print(f"Precursors in m/z 400-500: {len(mz_range)}")

820

821

# Create index for fast queries

822

new_flat.create_index(index_type='mz')

823

824

# Optimize storage

825

optimization_stats = new_flat.optimize_storage()

826

print(f"Storage optimization: {optimization_stats}")

827

```

828

829

### Library Format Conversion

830

831

```python

832

from alphabase.spectral_library.reader import (

833

get_library_reader, convert_library_format

834

)

835

from alphabase.spectral_library.translate import (

836

translate_to_diann_format, create_search_engine_libraries

837

)

838

839

# Auto-detect and read library format

840

reader = get_library_reader('unknown_library.tsv')

841

loaded_lib = reader.read_library('unknown_library.tsv')

842

print(f"Loaded library: {len(loaded_lib.precursor_df)} precursors")

843

844

# Convert between formats

845

convert_library_format(

846

input_path='library.csv',

847

output_path='library.h5',

848

input_format='csv',

849

output_format='hdf5'

850

)

851

852

# Translate to specific search engine formats

853

translate_to_diann_format(loaded_lib, 'library_diann.tsv')

854

print("Translated to DIA-NN format")

855

856

# Create libraries for multiple search engines

857

engine_libraries = create_search_engine_libraries(

858

loaded_lib,

859

output_dir='./libraries/',

860

engines=['diann', 'spectronaut', 'openswath']

861

)

862

print(f"Created libraries: {list(engine_libraries.keys())}")

863

```

864

865

### Library Validation and Quality Control

866

867

```python

868

from alphabase.spectral_library.validate import (

869

Schema, Required, Optional, validate_spectral_library_completeness,

870

assess_library_quality_metrics, generate_library_report

871

)

872

873

# Create validation schema

874

schema = Schema()

875

schema.add_column_requirement('sequence', 'required', data_type=str)

876

schema.add_column_requirement('charge', 'required', data_type=int)

877

schema.add_column_requirement('proteins', 'required', data_type=str)

878

schema.add_column_requirement('rt', 'optional', data_type=float)

879

880

# Validate library against schema

881

validation_results = schema.validate_library(loaded_lib)

882

print(f"Schema validation: {validation_results['passed']}")

883

if not validation_results['passed']:

884

print(f"Issues: {validation_results['issues']}")

885

886

# Check library completeness

887

completeness = validate_spectral_library_completeness(loaded_lib)

888

print(f"Library completeness:")

889

print(f" Precursor completeness: {completeness['precursor_completeness']:.1%}")

890

print(f" Fragment completeness: {completeness['fragment_completeness']:.1%}")

891

892

# Assess quality metrics

893

quality_metrics = assess_library_quality_metrics(loaded_lib)

894

print(f"Quality metrics:")

895

print(f" Average fragments per precursor: {quality_metrics['avg_fragments_per_precursor']:.1f}")

896

print(f" m/z range: {quality_metrics['mz_range']}")

897

print(f" Charge distribution: {quality_metrics['charge_distribution']}")

898

899

# Generate comprehensive report

900

report_data = generate_library_report(loaded_lib, 'library_report.html')

901

print(f"Generated report with {len(report_data['sections'])} sections")

902

```

903

904

### Parallel Library Processing

905

906

```python

907

from alphabase.spectral_library.translate import WritingProcess

908

909

# Process large library with multiple workers

910

writer = WritingProcess(n_processes=8)

911

912

# Write library in parallel

913

writer.write_library_parallel(

914

spec_lib=loaded_lib,

915

output_path='large_library.tsv',

916

format_type='tsv',

917

chunk_size=50000

918

)

919

920

# Write multiple formats simultaneously

921

format_paths = writer.write_multiple_formats(

922

spec_lib=loaded_lib,

923

base_path='library',

924

formats=['tsv', 'csv', 'msp']

925

)

926

print(f"Created formats: {format_paths}")

927

```

928

929

### Advanced Validation Workflows

930

931

```python

932

from alphabase.spectral_library.validate import Required, Optional, Column

933

934

# Create detailed column specifications

935

columns = [

936

Required('sequence', data_type=str),

937

Required('charge', data_type=int),

938

Required('proteins', data_type=str),

939

Optional('rt', data_type=float, default_value=0.0),

940

Column('mz', required=True, data_type=float, min_value=100.0, max_value=2000.0),

941

Column('intensity', required=False, data_type=float, min_value=0.0)

942

]

943

944

# Validate each column specification

945

validation_results = []

946

for col_spec in columns:

947

result = col_spec.validate(loaded_lib.precursor_df)

948

validation_results.append(result)

949

print(f"Column {col_spec.name}: {'PASS' if result['valid'] else 'FAIL'}")

950

951

# Custom validation workflow

952

def validate_library_for_dia_analysis(spec_lib):

953

"""Custom validation for DIA analysis requirements."""

954

issues = []

955

956

# Check for minimum precursors

957

if len(spec_lib.precursor_df) < 1000:

958

issues.append("Insufficient precursors for DIA analysis")

959

960

# Check charge distribution

961

charge_dist = spec_lib.precursor_df['charge'].value_counts()

962

if charge_dist.get(2, 0) / len(spec_lib.precursor_df) < 0.3:

963

issues.append("Low proportion of doubly charged precursors")

964

965

# Check m/z coverage

966

mz_min = spec_lib.precursor_df['mz'].min()

967

mz_max = spec_lib.precursor_df['mz'].max()

968

if mz_max - mz_min < 500:

969

issues.append("Limited m/z range coverage")

970

971

return {

972

'suitable_for_dia': len(issues) == 0,

973

'issues': issues,

974

'precursor_count': len(spec_lib.precursor_df),

975

'mz_range': (mz_min, mz_max),

976

'charge_distribution': charge_dist.to_dict()

977

}

978

979

# Apply custom validation

980

dia_validation = validate_library_for_dia_analysis(loaded_lib)

981

print(f"DIA suitability: {dia_validation}")

982

```