Tessl Tile for pypi/alphabase@1.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-peptide-operations.md advanced-spectral-libraries.md chemical-constants.md fragment-ions.md index.md io-utilities.md protein-analysis.md psm-readers.md quantification.md smiles-chemistry.md spectral-libraries.md

smiles-chemistry.mddocs/

0
# SMILES and Chemical Representations
1

2
Comprehensive cheminformatics capabilities for peptide and amino acid SMILES (Simplified Molecular-Input Line-Entry System) representations. Provides tools for chemical structure encoding, modification representation, and integration with computational chemistry workflows in proteomics.
3

4
## Capabilities
5

6
### Amino Acid SMILES Modification
7

8
Advanced system for modifying amino acid chemical structures using SMILES notation and applying chemical transformations.
9

10
```python { .api }
11
class AminoAcidModifier:
12
    """Chemical modification system for amino acids using SMILES representations."""
13
    
14
    def __init__(self):
15
        """Initialize amino acid modifier with default chemical data."""
16
    
17
    def get_amino_acid_smiles(self, aa_code: str) -> str:
18
        """
19
        Get SMILES representation for amino acid.
20
        
21
        Parameters:
22
        - aa_code: Single letter amino acid code
23
        
24
        Returns:
25
        SMILES string representing the amino acid structure
26
        """
27
    
28
    def apply_modification(self, aa_smiles: str, 
29
                          modification_smiles: str,
30
                          reaction_site: str = 'auto') -> str:
31
        """
32
        Apply chemical modification to amino acid SMILES.
33
        
34
        Parameters:
35
        - aa_smiles: Original amino acid SMILES
36
        - modification_smiles: SMILES of modification to apply
37
        - reaction_site: Specific reaction site or 'auto' for automatic detection
38
        
39
        Returns:
40
        Modified amino acid SMILES string
41
        """
42
    
43
    def create_modified_aa_library(self, modifications: List[dict]) -> dict:
44
        """
45
        Create library of modified amino acids.
46
        
47
        Parameters:
48
        - modifications: List of modification definitions with SMILES
49
        
50
        Returns:
51
        Dictionary mapping modified AA codes to SMILES representations
52
        """
53
    
54
    def validate_modification_chemistry(self, aa_code: str,
55
                                       modification: str) -> dict:
56
        """
57
        Validate chemical feasibility of amino acid modification.
58
        
59
        Parameters:
60
        - aa_code: Amino acid code
61
        - modification: Modification name or SMILES
62
        
63
        Returns:
64
        Dictionary with validation results and chemical properties
65
        """
66
    
67
    def calculate_modification_properties(self, modified_smiles: str) -> dict:
68
        """
69
        Calculate chemical properties of modified amino acid.
70
        
71
        Parameters:
72
        - modified_smiles: SMILES of modified amino acid
73
        
74
        Returns:
75
        Dictionary with molecular weight, formula, logP, etc.
76
        """
77
    
78
    def get_modification_reaction_mechanism(self, modification: str) -> dict:
79
        """
80
        Get reaction mechanism for modification.
81
        
82
        Parameters:
83
        - modification: Modification name
84
        
85
        Returns:
86
        Dictionary with reaction details and mechanism
87
        """
88

89
def load_amino_acid_smiles_database() -> dict:
90
    """
91
    Load comprehensive database of amino acid SMILES representations.
92
    
93
    Returns:
94
    Dictionary mapping amino acid codes to SMILES strings
95
    """
96

97
def create_custom_amino_acid(smiles: str, 
98
                           aa_code: str,
99
                           name: str = None) -> dict:
100
    """
101
    Create custom amino acid definition from SMILES.
102
    
103
    Parameters:
104
    - smiles: SMILES representation of the amino acid
105
    - aa_code: Single letter code for the amino acid
106
    - name: Optional full name for the amino acid
107
    
108
    Returns:
109
    Dictionary with amino acid definition
110
    """
111

112
def validate_amino_acid_smiles(smiles: str) -> dict:
113
    """
114
    Validate SMILES string for amino acid structure.
115
    
116
    Parameters:
117
    - smiles: SMILES string to validate
118
    
119
    Returns:
120
    Dictionary with validation results and structural analysis
121
    """
122
```
123

124
### Peptide SMILES Encoding
125

126
System for converting peptide sequences to complete chemical structure representations using SMILES notation.
127

128
```python { .api }
129
class PeptideSmilesEncoder:
130
    """Encoder for converting peptide sequences to SMILES representations."""
131
    
132
    def __init__(self, amino_acid_library: dict = None):
133
        """
134
        Initialize peptide SMILES encoder.
135
        
136
        Parameters:
137
        - amino_acid_library: Custom amino acid SMILES library
138
        """
139
    
140
    def encode_peptide_sequence(self, sequence: str,
141
                               modifications: List[dict] = None) -> str:
142
        """
143
        Convert peptide sequence to SMILES representation.
144
        
145
        Parameters:
146
        - sequence: Peptide sequence string
147
        - modifications: List of modifications with positions and SMILES
148
        
149
        Returns:
150
        Complete SMILES string representing the peptide structure
151
        """
152
    
153
    def encode_modified_peptide(self, sequence: str,
154
                               mod_names: List[str],
155
                               mod_sites: List[int]) -> str:
156
        """
157
        Encode peptide with standard modifications to SMILES.
158
        
159
        Parameters:
160
        - sequence: Peptide sequence
161
        - mod_names: List of modification names
162
        - mod_sites: List of modification sites (1-indexed)
163
        
164
        Returns:
165
        SMILES string with modifications incorporated
166
        """
167
    
168
    def create_peptide_graph(self, sequence: str,
169
                            modifications: List[dict] = None) -> object:
170
        """
171
        Create molecular graph representation of peptide.
172
        
173
        Parameters:
174
        - sequence: Peptide sequence
175
        - modifications: Optional modifications
176
        
177
        Returns:
178
        Molecular graph object (NetworkX or RDKit Mol)
179
        """
180
    
181
    def calculate_peptide_properties(self, peptide_smiles: str) -> dict:
182
        """
183
        Calculate chemical properties from peptide SMILES.
184
        
185
        Parameters:
186
        - peptide_smiles: SMILES representation of peptide
187
        
188
        Returns:
189
        Dictionary with molecular properties (MW, logP, PSA, etc.)
190
        """
191
    
192
    def generate_conformers(self, peptide_smiles: str,
193
                           num_conformers: int = 10) -> List[object]:
194
        """
195
        Generate 3D conformers from peptide SMILES.
196
        
197
        Parameters:
198
        - peptide_smiles: SMILES string of peptide
199
        - num_conformers: Number of conformers to generate
200
        
201
        Returns:
202
        List of 3D molecular conformer objects
203
        """
204
    
205
    def validate_peptide_structure(self, peptide_smiles: str) -> dict:
206
        """
207
        Validate peptide SMILES for structural correctness.
208
        
209
        Parameters:
210
        - peptide_smiles: Peptide SMILES to validate
211
        
212
        Returns:
213
        Dictionary with validation results and structural issues
214
        """
215
    
216
    def fragment_peptide_smiles(self, peptide_smiles: str,
217
                               fragmentation_type: str = 'b_y') -> dict:
218
        """
219
        Generate fragment ion SMILES from peptide SMILES.
220
        
221
        Parameters:
222
        - peptide_smiles: Parent peptide SMILES
223
        - fragmentation_type: Type of fragmentation ('b_y', 'cid', 'etd')
224
        
225
        Returns:
226
        Dictionary with fragment SMILES and masses
227
        """
228

229
def convert_sequence_to_smiles(sequence: str, 
230
                              modification_map: dict = None) -> str:
231
    """
232
    Convert peptide sequence to SMILES using default encoder.
233
    
234
    Parameters:
235
    - sequence: Peptide sequence string
236
    - modification_map: Optional modification mappings
237
    
238
    Returns:
239
    SMILES representation of the peptide
240
    """
241

242
def batch_encode_peptides(sequences: List[str],
243
                         modifications: List[List[dict]] = None,
244
                         n_jobs: int = 1) -> List[str]:
245
    """
246
    Batch encode multiple peptides to SMILES.
247
    
248
    Parameters:
249
    - sequences: List of peptide sequences
250
    - modifications: List of modification lists for each peptide
251
    - n_jobs: Number of parallel jobs
252
    
253
    Returns:
254
    List of SMILES strings for each peptide
255
    """
256
```
257

258
### Chemical Property Calculations
259

260
Functions for calculating molecular properties and descriptors from SMILES representations.
261

262
```python { .api }
263
def calculate_molecular_descriptors(smiles: str) -> dict:
264
    """
265
    Calculate comprehensive molecular descriptors from SMILES.
266
    
267
    Parameters:
268
    - smiles: SMILES string
269
    
270
    Returns:
271
    Dictionary with molecular descriptors (MW, logP, TPSA, etc.)
272
    """
273

274
def calculate_lipinski_properties(smiles: str) -> dict:
275
    """
276
    Calculate Lipinski Rule of Five properties.
277
    
278
    Parameters:
279
    - smiles: SMILES string
280
    
281
    Returns:
282
    Dictionary with Lipinski properties and compliance
283
    """
284

285
def calculate_peptide_hydrophobicity(smiles: str) -> float:
286
    """
287
    Calculate peptide hydrophobicity from SMILES.
288
    
289
    Parameters:
290
    - smiles: Peptide SMILES string
291
    
292
    Returns:
293
    Hydrophobicity score
294
    """
295

296
def predict_retention_time_from_smiles(smiles: str,
297
                                      model_type: str = 'krokhin') -> float:
298
    """
299
    Predict chromatographic retention time from SMILES.
300
    
301
    Parameters:
302
    - smiles: Peptide SMILES string
303
    - model_type: Prediction model ('krokhin', 'ssp', 'ml')
304
    
305
    Returns:
306
    Predicted retention time in minutes
307
    """
308

309
def calculate_collision_cross_section_smiles(smiles: str,
310
                                           charge: int = 1) -> float:
311
    """
312
    Predict collision cross section from SMILES structure.
313
    
314
    Parameters:
315
    - smiles: Peptide SMILES string
316
    - charge: Ion charge state
317
    
318
    Returns:
319
    Predicted CCS value in Ųa
320
    """
321

322
def assess_fragmentation_propensity(smiles: str) -> dict:
323
    """
324
    Assess peptide fragmentation propensity from structure.
325
    
326
    Parameters:
327
    - smiles: Peptide SMILES string
328
    
329
    Returns:
330
    Dictionary with fragmentation predictions
331
    """
332
```
333

334
### Structural Analysis and Comparison
335

336
Tools for analyzing and comparing chemical structures using SMILES representations.
337

338
```python { .api }
339
def calculate_tanimoto_similarity(smiles1: str, smiles2: str) -> float:
340
    """
341
    Calculate Tanimoto similarity between two SMILES structures.
342
    
343
    Parameters:
344
    - smiles1: First SMILES string
345
    - smiles2: Second SMILES string
346
    
347
    Returns:
348
    Tanimoto similarity coefficient (0-1)
349
    """
350

351
def find_structural_motifs(smiles: str, motif_patterns: List[str]) -> dict:
352
    """
353
    Find structural motifs in SMILES representation.
354
    
355
    Parameters:
356
    - smiles: SMILES string to analyze
357
    - motif_patterns: List of SMARTS patterns to search for
358
    
359
    Returns:
360
    Dictionary with motif matches and positions
361
    """
362

363
def cluster_peptides_by_structure(smiles_list: List[str],
364
                                 similarity_threshold: float = 0.7) -> List[List[int]]:
365
    """
366
    Cluster peptides based on structural similarity.
367
    
368
    Parameters:
369
    - smiles_list: List of SMILES strings
370
    - similarity_threshold: Minimum similarity for clustering
371
    
372
    Returns:
373
    List of clusters (lists of indices)
374
    """
375

376
def generate_structural_fingerprints(smiles: str,
377
                                   fingerprint_type: str = 'morgan') -> object:
378
    """
379
    Generate molecular fingerprints from SMILES.
380
    
381
    Parameters:
382
    - smiles: SMILES string
383
    - fingerprint_type: Type of fingerprint ('morgan', 'maccs', 'topological')
384
    
385
    Returns:
386
    Molecular fingerprint object
387
    """
388

389
def identify_functional_groups(smiles: str) -> List[dict]:
390
    """
391
    Identify functional groups in SMILES structure.
392
    
393
    Parameters:
394
    - smiles: SMILES string to analyze
395
    
396
    Returns:
397
    List of functional group identifications
398
    """
399
```
400

401
### Modification Database Integration
402

403
Integration with modification databases and chemical reaction systems.
404

405
```python { .api }
406
class ModificationSmilesDatabase:
407
    """Database for modification SMILES and reaction patterns."""
408
    
409
    def __init__(self, database_path: str = None):
410
        """
411
        Initialize modification database.
412
        
413
        Parameters:
414
        - database_path: Path to custom modification database
415
        """
416
    
417
    def get_modification_smiles(self, modification_name: str) -> str:
418
        """
419
        Get SMILES representation for named modification.
420
        
421
        Parameters:
422
        - modification_name: Standard modification name
423
        
424
        Returns:
425
        SMILES string representing the modification
426
        """
427
    
428
    def add_custom_modification(self, name: str, 
429
                               smiles: str,
430
                               reaction_pattern: str = None) -> None:
431
        """
432
        Add custom modification to database.
433
        
434
        Parameters:
435
        - name: Modification name
436
        - smiles: SMILES representation
437
        - reaction_pattern: SMARTS reaction pattern
438
        """
439
    
440
    def search_modifications_by_structure(self, query_smiles: str,
441
                                         similarity_threshold: float = 0.8) -> List[dict]:
442
        """
443
        Search modifications by structural similarity.
444
        
445
        Parameters:
446
        - query_smiles: Query SMILES structure
447
        - similarity_threshold: Minimum similarity threshold
448
        
449
        Returns:
450
        List of matching modifications with similarity scores
451
        """
452
    
453
    def get_reaction_products(self, reactant_smiles: str,
454
                             modification_name: str) -> List[str]:
455
        """
456
        Get reaction products for modification.
457
        
458
        Parameters:
459
        - reactant_smiles: Starting amino acid SMILES
460
        - modification_name: Modification to apply
461
        
462
        Returns:
463
        List of possible product SMILES
464
        """
465

466
def load_unimod_smiles_mappings() -> dict:
467
    """
468
    Load SMILES mappings for UniMod modifications.
469
    
470
    Returns:
471
    Dictionary mapping UniMod IDs to SMILES representations
472
    """
473

474
def create_reaction_template(reactant_pattern: str,
475
                           product_pattern: str) -> object:
476
    """
477
    Create reaction template from SMARTS patterns.
478
    
479
    Parameters:
480
    - reactant_pattern: SMARTS pattern for reactants
481
    - product_pattern: SMARTS pattern for products
482
    
483
    Returns:
484
    Reaction template object
485
    """
486
```
487

488
### Visualization and Export
489

490
Tools for visualizing chemical structures and exporting to various formats.
491

492
```python { .api }
493
def visualize_peptide_structure(smiles: str,
494
                               output_path: str = None,
495
                               format: str = 'png') -> object:
496
    """
497
    Visualize peptide structure from SMILES.
498
    
499
    Parameters:
500
    - smiles: SMILES string to visualize
501
    - output_path: Optional path to save image
502
    - format: Output format ('png', 'svg', 'pdf')
503
    
504
    Returns:
505
    Image object or None if saved to file
506
    """
507

508
def export_to_sdf(smiles_list: List[str],
509
                 output_path: str,
510
                 include_properties: bool = True) -> None:
511
    """
512
    Export SMILES list to SDF format.
513
    
514
    Parameters:
515
    - smiles_list: List of SMILES strings
516
    - output_path: Output SDF file path
517
    - include_properties: Include calculated properties
518
    """
519

520
def export_to_mol2(smiles: str, output_path: str) -> None:
521
    """
522
    Export SMILES to MOL2 format with 3D coordinates.
523
    
524
    Parameters:
525
    - smiles: SMILES string
526
    - output_path: Output MOL2 file path
527
    """
528

529
def create_structure_grid(smiles_list: List[str],
530
                         labels: List[str] = None,
531
                         grid_size: tuple = (4, 4)) -> object:
532
    """
533
    Create grid visualization of multiple structures.
534
    
535
    Parameters:
536
    - smiles_list: List of SMILES to visualize
537
    - labels: Optional labels for each structure
538
    - grid_size: Grid dimensions (rows, columns)
539
    
540
    Returns:
541
    Grid image object
542
    """
543
```
544

545
## Usage Examples
546

547
### Basic Amino Acid Modification
548

549
```python
550
from alphabase.smiles.smiles import AminoAcidModifier
551
from alphabase.smiles.peptide import PeptideSmilesEncoder
552

553
# Initialize amino acid modifier
554
modifier = AminoAcidModifier()
555

556
# Get SMILES for standard amino acid
557
cys_smiles = modifier.get_amino_acid_smiles('C')
558
print(f"Cysteine SMILES: {cys_smiles}")
559

560
# Apply carbamidomethyl modification
561
carbamidomethyl_smiles = "CC(=O)N"  # Simplified modification SMILES
562
modified_cys = modifier.apply_modification(
563
    aa_smiles=cys_smiles,
564
    modification_smiles=carbamidomethyl_smiles,
565
    reaction_site='thiol'
566
)
567
print(f"Carbamidomethyl cysteine SMILES: {modified_cys}")
568

569
# Calculate properties of modified amino acid
570
properties = modifier.calculate_modification_properties(modified_cys)
571
print(f"Modified cysteine properties: {properties}")
572
```
573

574
### Peptide SMILES Encoding
575

576
```python
577
from alphabase.smiles.peptide import PeptideSmilesEncoder, convert_sequence_to_smiles
578

579
# Initialize peptide encoder
580
encoder = PeptideSmilesEncoder()
581

582
# Simple peptide encoding
583
peptide_seq = "PEPTIDE"
584
peptide_smiles = encoder.encode_peptide_sequence(peptide_seq)
585
print(f"Peptide SMILES: {peptide_smiles}")
586

587
# Encode peptide with modifications
588
modified_seq = "PEPTIDE"
589
mod_names = ["Oxidation (M)"]
590
mod_sites = [1]  # First position (1-indexed)
591

592
modified_smiles = encoder.encode_modified_peptide(
593
    sequence=modified_seq,
594
    mod_names=mod_names,
595
    mod_sites=mod_sites
596
)
597
print(f"Modified peptide SMILES: {modified_smiles}")
598

599
# Calculate peptide properties
600
properties = encoder.calculate_peptide_properties(peptide_smiles)
601
print(f"Peptide properties:")
602
print(f"  Molecular weight: {properties['molecular_weight']:.2f}")
603
print(f"  LogP: {properties['logp']:.2f}")
604
print(f"  Polar surface area: {properties['tpsa']:.2f}")
605
```
606

607
### Batch Processing and Analysis
608

609
```python
610
from alphabase.smiles.peptide import batch_encode_peptides
611
from alphabase.smiles.smiles import calculate_molecular_descriptors
612

613
# Batch encode multiple peptides
614
sequences = ["PEPTIDE", "SEQUENCE", "EXAMPLE", "TESTING"]
615
batch_smiles = batch_encode_peptides(sequences, n_jobs=2)
616

617
print(f"Encoded {len(batch_smiles)} peptides to SMILES")
618

619
# Calculate descriptors for all peptides
620
descriptors_list = []
621
for i, smiles in enumerate(batch_smiles):
622
    descriptors = calculate_molecular_descriptors(smiles)
623
    descriptors['sequence'] = sequences[i]
624
    descriptors_list.append(descriptors)
625

626
# Analyze molecular properties
627
import pandas as pd
628
descriptors_df = pd.DataFrame(descriptors_list)
629
print(f"Molecular descriptor statistics:")
630
print(descriptors_df[['molecular_weight', 'logp', 'tpsa']].describe())
631
```
632

633
### Chemical Property Predictions
634

635
```python
636
from alphabase.smiles.smiles import (
637
    calculate_lipinski_properties, predict_retention_time_from_smiles,
638
    calculate_collision_cross_section_smiles
639
)
640

641
# Example peptide SMILES
642
peptide_smiles = batch_smiles[0]  # From previous example
643

644
# Lipinski properties
645
lipinski = calculate_lipinski_properties(peptide_smiles)
646
print(f"Lipinski properties: {lipinski}")
647
print(f"Rule of 5 compliant: {lipinski['compliant']}")
648

649
# Predict retention time
650
predicted_rt = predict_retention_time_from_smiles(peptide_smiles, model_type='krokhin')
651
print(f"Predicted retention time: {predicted_rt:.2f} minutes")
652

653
# Predict collision cross section
654
predicted_ccs = calculate_collision_cross_section_smiles(peptide_smiles, charge=2)
655
print(f"Predicted CCS: {predicted_ccs:.1f} Ų")
656
```
657

658
### Structural Analysis and Similarity
659

660
```python
661
from alphabase.smiles.smiles import (
662
    calculate_tanimoto_similarity, cluster_peptides_by_structure,
663
    find_structural_motifs
664
)
665

666
# Calculate similarity between peptides
667
smiles1 = batch_smiles[0]
668
smiles2 = batch_smiles[1]
669
similarity = calculate_tanimoto_similarity(smiles1, smiles2)
670
print(f"Tanimoto similarity: {similarity:.3f}")
671

672
# Cluster peptides by structural similarity
673
clusters = cluster_peptides_by_structure(batch_smiles, similarity_threshold=0.6)
674
print(f"Found {len(clusters)} structural clusters")
675
for i, cluster in enumerate(clusters):
676
    cluster_sequences = [sequences[idx] for idx in cluster]
677
    print(f"  Cluster {i+1}: {cluster_sequences}")
678

679
# Find structural motifs
680
motif_patterns = [
681
    "CC(C)C",  # Leucine/Isoleucine side chain
682
    "c1ccccc1",  # Benzene ring (Phe, Tyr, Trp)
683
    "CCC(=O)O"  # Carboxylic acid
684
]
685

686
for smiles in batch_smiles[:2]:
687
    motifs = find_structural_motifs(smiles, motif_patterns)
688
    print(f"Motifs in {smiles[:20]}...")
689
    for pattern, matches in motifs.items():
690
        if matches:
691
            print(f"  Found pattern {pattern}: {len(matches)} matches")
692
```
693

694
### Modification Database Operations
695

696
```python
697
from alphabase.smiles.smiles import ModificationSmilesDatabase, load_unimod_smiles_mappings
698

699
# Load modification database
700
mod_db = ModificationSmilesDatabase()
701

702
# Get SMILES for common modifications
703
oxidation_smiles = mod_db.get_modification_smiles("Oxidation (M)")
704
phospho_smiles = mod_db.get_modification_smiles("Phospho (STY)")
705
print(f"Oxidation SMILES: {oxidation_smiles}")
706
print(f"Phosphorylation SMILES: {phospho_smiles}")
707

708
# Add custom modification
709
mod_db.add_custom_modification(
710
    name="Custom_Alkylation",
711
    smiles="CCCC",
712
    reaction_pattern="[#6:1]>>[#6:1]CCCC"
713
)
714

715
# Search for structurally similar modifications
716
query_smiles = "CC(=O)N"
717
similar_mods = mod_db.search_modifications_by_structure(
718
    query_smiles, 
719
    similarity_threshold=0.7
720
)
721
print(f"Similar modifications: {[mod['name'] for mod in similar_mods]}")
722

723
# Load UniMod mappings
724
unimod_smiles = load_unimod_smiles_mappings()
725
print(f"Loaded {len(unimod_smiles)} UniMod SMILES mappings")
726
```
727

728
### Fragment Ion Analysis
729

730
```python
731
# Generate fragment ion SMILES
732
fragment_smiles = encoder.fragment_peptide_smiles(
733
    peptide_smiles=batch_smiles[0],
734
    fragmentation_type='b_y'
735
)
736

737
print(f"Generated fragments:")
738
for frag_type, frag_list in fragment_smiles.items():
739
    print(f"  {frag_type} ions: {len(frag_list)}")
740
    for frag in frag_list[:3]:  # Show first 3
741
        print(f"    SMILES: {frag['smiles'][:30]}...")
742
        print(f"    Mass: {frag['mass']:.4f}")
743
```
744

745
### Visualization and Export
746

747
```python
748
from alphabase.smiles.smiles import (
749
    visualize_peptide_structure, export_to_sdf, create_structure_grid
750
)
751

752
# Visualize single peptide structure
753
structure_img = visualize_peptide_structure(
754
    smiles=batch_smiles[0],
755
    output_path='peptide_structure.png',
756
    format='png'
757
)
758

759
# Create grid visualization of multiple structures
760
grid_img = create_structure_grid(
761
    smiles_list=batch_smiles,
762
    labels=sequences,
763
    grid_size=(2, 2)
764
)
765

766
# Export to SDF format for external analysis
767
export_to_sdf(
768
    smiles_list=batch_smiles,
769
    output_path='peptides.sdf',
770
    include_properties=True
771
)
772

773
print("Visualization and export completed")
774
```
775

776
### Advanced Chemical Analysis
777

778
```python
779
from alphabase.smiles.smiles import (
780
    generate_structural_fingerprints, identify_functional_groups,
781
    assess_fragmentation_propensity
782
)
783

784
# Generate molecular fingerprints
785
fingerprint = generate_structural_fingerprints(
786
    smiles=batch_smiles[0],
787
    fingerprint_type='morgan'
788
)
789
print(f"Generated molecular fingerprint with {len(fingerprint)} bits")
790

791
# Identify functional groups
792
functional_groups = identify_functional_groups(batch_smiles[0])
793
print(f"Functional groups identified:")
794
for group in functional_groups:
795
    print(f"  {group['name']}: {group['count']} occurrences")
796

797
# Assess fragmentation propensity
798
fragmentation = assess_fragmentation_propensity(batch_smiles[0])
799
print(f"Fragmentation analysis:")
800
print(f"  Predicted dominant fragments: {fragmentation['dominant_fragments']}")
801
print(f"  Fragmentation score: {fragmentation['fragmentation_score']:.2f}")
802
```
803

804
### Integration with Proteomics Workflows
805

806
```python
807
# Example: Integrate SMILES with spectral library
808
from alphabase.spectral_library.base import SpecLibBase
809
import pandas as pd
810

811
# Create spectral library with SMILES information
812
spec_lib = SpecLibBase()
813
spec_lib.precursor_df = pd.DataFrame({
814
    'sequence': sequences,
815
    'mods': ['', '', '', ''],
816
    'charge': [2, 2, 2, 2],
817
    'proteins': ['P1', 'P2', 'P3', 'P4'],
818
    'smiles': batch_smiles
819
})
820

821
# Add chemical properties to spectral library
822
for idx, row in spec_lib.precursor_df.iterrows():
823
    props = calculate_molecular_descriptors(row['smiles'])
824
    spec_lib.precursor_df.loc[idx, 'molecular_weight'] = props['molecular_weight']
825
    spec_lib.precursor_df.loc[idx, 'logp'] = props['logp']
826
    spec_lib.precursor_df.loc[idx, 'predicted_rt'] = predict_retention_time_from_smiles(row['smiles'])
827

828
print(f"Enhanced spectral library with chemical properties:")
829
print(spec_lib.precursor_df[['sequence', 'molecular_weight', 'logp', 'predicted_rt']].head())
830
```

Version

Tile

Files

smiles-chemistry.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

smiles-chemistry.mddocs/