0
# Peptide and Protein Analysis
1
2
Comprehensive support for peptide sequence analysis, database search results, and protein identification workflows. Includes modification handling, sequence properties, and identification result processing.
3
4
## Capabilities
5
6
### Peptide Sequences
7
8
#### AASequence
9
10
Amino acid sequence representation with comprehensive modification support.
11
12
```python { .api }
13
class AASequence:
14
def __init__(self) -> None: ...
15
16
@staticmethod
17
def fromString(sequence: str) -> AASequence:
18
"""
19
Create sequence from string notation.
20
21
Args:
22
sequence (str): Peptide sequence with modifications (e.g., "PEPTIDE", "PEPTIDEM(Oxidation)")
23
24
Returns:
25
AASequence: Parsed amino acid sequence
26
"""
27
28
def toString(self) -> str:
29
"""
30
Convert sequence to string notation.
31
32
Returns:
33
str: String representation with modifications
34
"""
35
36
def toUnmodifiedString(self) -> str:
37
"""
38
Get unmodified sequence string.
39
40
Returns:
41
str: Plain amino acid sequence without modifications
42
"""
43
44
def size(self) -> int:
45
"""
46
Get sequence length.
47
48
Returns:
49
int: Number of amino acids
50
"""
51
52
def empty(self) -> bool:
53
"""
54
Check if sequence is empty.
55
56
Returns:
57
bool: True if empty sequence
58
"""
59
60
def getMonoWeight(self) -> float:
61
"""
62
Get monoisotopic molecular weight.
63
64
Returns:
65
float: Monoisotopic weight in Da
66
"""
67
68
def getAverageWeight(self) -> float:
69
"""
70
Get average molecular weight.
71
72
Returns:
73
float: Average weight in Da
74
"""
75
76
def getFormula(self) -> EmpiricalFormula:
77
"""
78
Get empirical formula.
79
80
Returns:
81
EmpiricalFormula: Molecular formula
82
"""
83
84
def getResidue(self, index: int) -> Residue:
85
"""
86
Get residue at position.
87
88
Args:
89
index (int): Position index (0-based)
90
91
Returns:
92
Residue: Amino acid residue
93
"""
94
95
def setModification(self, index: int, modification: str) -> None:
96
"""
97
Set modification at position.
98
99
Args:
100
index (int): Position index
101
modification (str): Modification name
102
"""
103
104
def getNTerminalModification(self) -> ResidueModification:
105
"""
106
Get N-terminal modification.
107
108
Returns:
109
ResidueModification: N-terminal modification
110
"""
111
112
def setNTerminalModification(self, mod: ResidueModification) -> None:
113
"""
114
Set N-terminal modification.
115
116
Args:
117
mod (ResidueModification): N-terminal modification
118
"""
119
120
def getCTerminalModification(self) -> ResidueModification:
121
"""
122
Get C-terminal modification.
123
124
Returns:
125
ResidueModification: C-terminal modification
126
"""
127
128
def setCTerminalModification(self, mod: ResidueModification) -> None:
129
"""
130
Set C-terminal modification.
131
132
Args:
133
mod (ResidueModification): C-terminal modification
134
"""
135
136
def setModificationByDiffMonoMass(self, index: int, diff_mono_mass: float) -> None:
137
"""
138
Set modification by monoisotopic mass difference.
139
140
Args:
141
index (int): Position index
142
diff_mono_mass (float): Mass difference in Da
143
"""
144
145
def setNTerminalModificationByDiffMonoMass(self, diff_mono_mass: float, protein_term: bool = False) -> None:
146
"""
147
Set N-terminal modification by mass difference.
148
149
Args:
150
diff_mono_mass (float): Mass difference in Da
151
protein_term (bool): Whether this is a protein N-terminus
152
"""
153
154
def setCTerminalModificationByDiffMonoMass(self, diff_mono_mass: float, protein_term: bool = False) -> None:
155
"""
156
Set C-terminal modification by mass difference.
157
158
Args:
159
diff_mono_mass (float): Mass difference in Da
160
protein_term (bool): Whether this is a protein C-terminus
161
"""
162
163
def toUniModString(self) -> str:
164
"""
165
Convert to UniMod-style string notation.
166
167
Returns:
168
str: UniMod-style string representation
169
"""
170
171
def toBracketString(self, integer_mass: bool = True, mass_delta: bool = False) -> str:
172
"""
173
Create TPP-compatible bracket notation string.
174
175
Args:
176
integer_mass (bool): Use integer masses
177
mass_delta (bool): Show mass deltas instead of absolute masses
178
179
Returns:
180
str: Bracket notation string
181
"""
182
183
def getPrefix(self, index: int) -> AASequence:
184
"""
185
Get prefix sequence of specified length.
186
187
Args:
188
index (int): Length of prefix
189
190
Returns:
191
AASequence: Prefix sequence
192
"""
193
194
def getSuffix(self, index: int) -> AASequence:
195
"""
196
Get suffix sequence of specified length.
197
198
Args:
199
index (int): Length of suffix
200
201
Returns:
202
AASequence: Suffix sequence
203
"""
204
205
def getSubsequence(self, index: int, length: int) -> AASequence:
206
"""
207
Get subsequence starting at position.
208
209
Args:
210
index (int): Start position
211
length (int): Length of subsequence
212
213
Returns:
214
AASequence: Subsequence
215
"""
216
217
def getMZ(self, charge: int) -> float:
218
"""
219
Get m/z value for given charge state.
220
221
Args:
222
charge (int): Charge state
223
224
Returns:
225
float: m/z value
226
"""
227
228
def has(self, residue: Residue) -> bool:
229
"""
230
Check if sequence contains residue.
231
232
Args:
233
residue (Residue): Residue to search for
234
235
Returns:
236
bool: True if residue found
237
"""
238
239
def hasSubsequence(self, peptide: AASequence) -> bool:
240
"""
241
Check if sequence contains subsequence.
242
243
Args:
244
peptide (AASequence): Subsequence to search for
245
246
Returns:
247
bool: True if subsequence found
248
"""
249
250
def hasPrefix(self, peptide: AASequence) -> bool:
251
"""
252
Check if sequence has given prefix.
253
254
Args:
255
peptide (AASequence): Prefix to check
256
257
Returns:
258
bool: True if sequence starts with prefix
259
"""
260
261
def hasSuffix(self, peptide: AASequence) -> bool:
262
"""
263
Check if sequence has given suffix.
264
265
Args:
266
peptide (AASequence): Suffix to check
267
268
Returns:
269
bool: True if sequence ends with suffix
270
"""
271
272
def hasNTerminalModification(self) -> bool:
273
"""
274
Check if N-terminal modification present.
275
276
Returns:
277
bool: True if N-terminal modification present
278
"""
279
280
def hasCTerminalModification(self) -> bool:
281
"""
282
Check if C-terminal modification present.
283
284
Returns:
285
bool: True if C-terminal modification present
286
"""
287
288
def isModified(self) -> bool:
289
"""
290
Check if sequence has modifications.
291
292
Returns:
293
bool: True if any modifications present
294
"""
295
```
296
297
### Amino Acid Properties
298
299
#### Residue
300
301
Individual amino acid residue with chemical properties.
302
303
```python { .api }
304
class Residue:
305
def __init__(self) -> None: ...
306
307
def getName(self) -> str:
308
"""
309
Get residue name.
310
311
Returns:
312
str: Full residue name
313
"""
314
315
def getOneLetterCode(self) -> str:
316
"""
317
Get single letter code.
318
319
Returns:
320
str: One letter amino acid code
321
"""
322
323
def getThreeLetterCode(self) -> str:
324
"""
325
Get three letter code.
326
327
Returns:
328
str: Three letter amino acid code
329
"""
330
331
def getMonoWeight(self) -> float:
332
"""
333
Get monoisotopic weight.
334
335
Returns:
336
float: Monoisotopic weight in Da
337
"""
338
339
def getAverageWeight(self) -> float:
340
"""
341
Get average weight.
342
343
Returns:
344
float: Average weight in Da
345
"""
346
347
def getFormula(self) -> EmpiricalFormula:
348
"""
349
Get empirical formula.
350
351
Returns:
352
EmpiricalFormula: Chemical formula
353
"""
354
355
def isModified(self) -> bool:
356
"""
357
Check if residue is modified.
358
359
Returns:
360
bool: True if modified
361
"""
362
363
def getModification(self) -> ResidueModification:
364
"""
365
Get modification.
366
367
Returns:
368
ResidueModification: Residue modification
369
"""
370
371
class ResidueDB:
372
@staticmethod
373
def getInstance() -> ResidueDB:
374
"""
375
Get singleton instance.
376
377
Returns:
378
ResidueDB: Residue database instance
379
"""
380
381
def getResidue(self, name: str) -> Residue:
382
"""
383
Get residue by name or code.
384
385
Args:
386
name (str): Residue name or code
387
388
Returns:
389
Residue: Amino acid residue
390
"""
391
392
def getResidues(self) -> set[Residue]:
393
"""
394
Get all residues.
395
396
Returns:
397
set[Residue]: All available residues
398
"""
399
```
400
401
### Modifications
402
403
#### ResidueModification
404
405
Post-translational and chemical modifications.
406
407
```python { .api }
408
class ResidueModification:
409
def __init__(self) -> None: ...
410
411
def getId(self) -> str:
412
"""
413
Get modification ID.
414
415
Returns:
416
str: Unique modification identifier
417
"""
418
419
def getFullId(self) -> str:
420
"""
421
Get full modification ID.
422
423
Returns:
424
str: Complete modification identifier
425
"""
426
427
def getFullName(self) -> str:
428
"""
429
Get full modification name.
430
431
Returns:
432
str: Complete modification name
433
"""
434
435
def getName(self) -> str:
436
"""
437
Get short modification name.
438
439
Returns:
440
str: Short name
441
"""
442
443
def getMonoMass(self) -> float:
444
"""
445
Get monoisotopic mass delta.
446
447
Returns:
448
float: Mass change in Da
449
"""
450
451
def getAverageMass(self) -> float:
452
"""
453
Get average mass delta.
454
455
Returns:
456
float: Average mass change in Da
457
"""
458
459
def getDiffFormula(self) -> EmpiricalFormula:
460
"""
461
Get formula difference.
462
463
Returns:
464
EmpiricalFormula: Chemical formula change
465
"""
466
467
def getOrigin(self) -> str:
468
"""
469
Get amino acid origin.
470
471
Returns:
472
str: Target amino acid
473
"""
474
475
def getClassification(self) -> TermSpecificity:
476
"""
477
Get classification.
478
479
Returns:
480
TermSpecificity: Modification classification
481
"""
482
483
class ModificationsDB:
484
@staticmethod
485
def getInstance() -> ModificationsDB:
486
"""
487
Get singleton instance.
488
489
Returns:
490
ModificationsDB: Modifications database instance
491
"""
492
493
def getModification(self, name: str) -> ResidueModification:
494
"""
495
Get modification by name.
496
497
Args:
498
name (str): Modification name
499
500
Returns:
501
ResidueModification: The modification
502
"""
503
504
def getAllSearchModifications(self) -> list[ResidueModification]:
505
"""
506
Get all search modifications.
507
508
Returns:
509
list[ResidueModification]: Available search modifications
510
"""
511
512
def getModifications(self, residue: str, term_spec: TermSpecificity) -> list[ResidueModification]:
513
"""
514
Get modifications for residue.
515
516
Args:
517
residue (str): Target amino acid
518
term_spec (TermSpecificity): Terminal specificity
519
520
Returns:
521
list[ResidueModification]: Applicable modifications
522
"""
523
```
524
525
### Database Search Results
526
527
#### PeptideIdentification
528
529
Peptide identification from database search.
530
531
```python { .api }
532
class PeptideIdentification:
533
def __init__(self) -> None: ...
534
535
def getHits(self) -> list[PeptideHit]:
536
"""
537
Get peptide hits.
538
539
Returns:
540
list[PeptideHit]: List of peptide hits
541
"""
542
543
def setHits(self, hits: list[PeptideHit]) -> None:
544
"""
545
Set peptide hits.
546
547
Args:
548
hits (list[PeptideHit]): Peptide hits to set
549
"""
550
551
def insertHit(self, hit: PeptideHit) -> None:
552
"""
553
Add peptide hit.
554
555
Args:
556
hit (PeptideHit): Peptide hit to add
557
"""
558
559
def getRT(self) -> float:
560
"""
561
Get retention time.
562
563
Returns:
564
float: Retention time in seconds
565
"""
566
567
def setRT(self, rt: float) -> None:
568
"""
569
Set retention time.
570
571
Args:
572
rt (float): Retention time in seconds
573
"""
574
575
def getMZ(self) -> float:
576
"""
577
Get precursor m/z.
578
579
Returns:
580
float: Precursor m/z
581
"""
582
583
def setMZ(self, mz: float) -> None:
584
"""
585
Set precursor m/z.
586
587
Args:
588
mz (float): Precursor m/z
589
"""
590
591
def getScoreType(self) -> str:
592
"""
593
Get score type name.
594
595
Returns:
596
str: Score type (e.g., "expect", "mascot_score")
597
"""
598
599
def setScoreType(self, score_type: str) -> None:
600
"""
601
Set score type.
602
603
Args:
604
score_type (str): Score type name
605
"""
606
607
def getIdentifier(self) -> str:
608
"""
609
Get identification run identifier.
610
611
Returns:
612
str: Run identifier
613
"""
614
615
def setIdentifier(self, id: str) -> None:
616
"""
617
Set identification run identifier.
618
619
Args:
620
id (str): Run identifier
621
"""
622
623
def isHigherScoreBetter(self) -> bool:
624
"""
625
Check if higher score is better.
626
627
Returns:
628
bool: True if higher score indicates better match
629
"""
630
631
def setHigherScoreBetter(self, higher_better: bool) -> None:
632
"""
633
Set score direction.
634
635
Args:
636
higher_better (bool): True if higher score is better
637
"""
638
639
def getSignificanceThreshold(self) -> float:
640
"""
641
Get significance threshold.
642
643
Returns:
644
float: Significance threshold
645
"""
646
647
def setSignificanceThreshold(self, threshold: float) -> None:
648
"""
649
Set significance threshold.
650
651
Args:
652
threshold (float): Significance threshold
653
"""
654
```
655
656
#### PeptideHit
657
658
Individual peptide hit with sequence and score.
659
660
```python { .api }
661
class PeptideHit:
662
def __init__(self) -> None: ...
663
664
def getSequence(self) -> AASequence:
665
"""
666
Get peptide sequence.
667
668
Returns:
669
AASequence: Peptide sequence with modifications
670
"""
671
672
def setSequence(self, sequence: AASequence) -> None:
673
"""
674
Set peptide sequence.
675
676
Args:
677
sequence (AASequence): Peptide sequence
678
"""
679
680
def getScore(self) -> float:
681
"""
682
Get peptide score.
683
684
Returns:
685
float: Search engine score
686
"""
687
688
def setScore(self, score: float) -> None:
689
"""
690
Set peptide score.
691
692
Args:
693
score (float): Search engine score
694
"""
695
696
def getRank(self) -> int:
697
"""
698
Get peptide rank.
699
700
Returns:
701
int: Rank among hits (1-based)
702
"""
703
704
def setRank(self, rank: int) -> None:
705
"""
706
Set peptide rank.
707
708
Args:
709
rank (int): Rank among hits
710
"""
711
712
def getCharge(self) -> int:
713
"""
714
Get peptide charge.
715
716
Returns:
717
int: Charge state
718
"""
719
720
def setCharge(self, charge: int) -> None:
721
"""
722
Set peptide charge.
723
724
Args:
725
charge (int): Charge state
726
"""
727
728
def getPeptideEvidences(self) -> list[PeptideEvidence]:
729
"""
730
Get peptide evidences.
731
732
Returns:
733
list[PeptideEvidence]: List of peptide evidences
734
"""
735
736
def setPeptideEvidences(self, evidences: list[PeptideEvidence]) -> None:
737
"""
738
Set peptide evidences.
739
740
Args:
741
evidences (list[PeptideEvidence]): Peptide evidences
742
"""
743
744
def addPeptideEvidence(self, evidence: PeptideEvidence) -> None:
745
"""
746
Add peptide evidence.
747
748
Args:
749
evidence (PeptideEvidence): Peptide evidence to add
750
"""
751
752
class PeptideEvidence:
753
def __init__(self) -> None: ...
754
755
def getProteinAccession(self) -> str:
756
"""
757
Get protein accession.
758
759
Returns:
760
str: Protein accession number
761
"""
762
763
def setProteinAccession(self, accession: str) -> None:
764
"""
765
Set protein accession.
766
767
Args:
768
accession (str): Protein accession number
769
"""
770
771
def getStart(self) -> int:
772
"""
773
Get start position in protein.
774
775
Returns:
776
int: Start position (0-based)
777
"""
778
779
def setStart(self, start: int) -> None:
780
"""
781
Set start position.
782
783
Args:
784
start (int): Start position
785
"""
786
787
def getEnd(self) -> int:
788
"""
789
Get end position in protein.
790
791
Returns:
792
int: End position (0-based)
793
"""
794
795
def setEnd(self, end: int) -> None:
796
"""
797
Set end position.
798
799
Args:
800
end (int): End position
801
"""
802
803
def getAABefore(self) -> str:
804
"""
805
Get amino acid before peptide.
806
807
Returns:
808
str: Amino acid before cleavage site
809
"""
810
811
def getAAAfter(self) -> str:
812
"""
813
Get amino acid after peptide.
814
815
Returns:
816
str: Amino acid after cleavage site
817
"""
818
```
819
820
### Protein Identification
821
822
#### ProteinIdentification
823
824
Protein identification run information and results.
825
826
```python { .api }
827
class ProteinIdentification:
828
def __init__(self) -> None: ...
829
830
def getHits(self) -> list[ProteinHit]:
831
"""
832
Get protein hits.
833
834
Returns:
835
list[ProteinHit]: List of protein hits
836
"""
837
838
def setHits(self, hits: list[ProteinHit]) -> None:
839
"""
840
Set protein hits.
841
842
Args:
843
hits (list[ProteinHit]): Protein hits to set
844
"""
845
846
def insertHit(self, hit: ProteinHit) -> None:
847
"""
848
Add protein hit.
849
850
Args:
851
hit (ProteinHit): Protein hit to add
852
"""
853
854
def getIdentifier(self) -> str:
855
"""
856
Get identification run identifier.
857
858
Returns:
859
str: Run identifier
860
"""
861
862
def setIdentifier(self, id: str) -> None:
863
"""
864
Set identification run identifier.
865
866
Args:
867
id (str): Run identifier
868
"""
869
870
def getSearchEngine(self) -> str:
871
"""
872
Get search engine name.
873
874
Returns:
875
str: Search engine name
876
"""
877
878
def setSearchEngine(self, engine: str) -> None:
879
"""
880
Set search engine name.
881
882
Args:
883
engine (str): Search engine name
884
"""
885
886
def getSearchEngineVersion(self) -> str:
887
"""
888
Get search engine version.
889
890
Returns:
891
str: Search engine version
892
"""
893
894
def getSearchParameters(self) -> SearchParameters:
895
"""
896
Get search parameters.
897
898
Returns:
899
SearchParameters: Search configuration
900
"""
901
902
def setSearchParameters(self, params: SearchParameters) -> None:
903
"""
904
Set search parameters.
905
906
Args:
907
params (SearchParameters): Search configuration
908
"""
909
910
def getDateTime(self) -> DateTime:
911
"""
912
Get identification date/time.
913
914
Returns:
915
DateTime: Identification timestamp
916
"""
917
918
def getPrimaryMSRunPath(self, paths: list[str]) -> None:
919
"""
920
Get primary MS run paths.
921
922
Args:
923
paths (list[str]): List to populate with paths
924
"""
925
926
class ProteinHit:
927
def __init__(self) -> None: ...
928
929
def getAccession(self) -> str:
930
"""
931
Get protein accession.
932
933
Returns:
934
str: Protein accession number
935
"""
936
937
def setAccession(self, accession: str) -> None:
938
"""
939
Set protein accession.
940
941
Args:
942
accession (str): Protein accession number
943
"""
944
945
def getScore(self) -> float:
946
"""
947
Get protein score.
948
949
Returns:
950
float: Protein score
951
"""
952
953
def setScore(self, score: float) -> None:
954
"""
955
Set protein score.
956
957
Args:
958
score (float): Protein score
959
"""
960
961
def getRank(self) -> int:
962
"""
963
Get protein rank.
964
965
Returns:
966
int: Rank among hits (1-based)
967
"""
968
969
def setRank(self, rank: int) -> None:
970
"""
971
Set protein rank.
972
973
Args:
974
rank (int): Rank among hits
975
"""
976
977
def getSequence(self) -> str:
978
"""
979
Get protein sequence.
980
981
Returns:
982
str: Full protein sequence
983
"""
984
985
def setSequence(self, sequence: str) -> None:
986
"""
987
Set protein sequence.
988
989
Args:
990
sequence (str): Full protein sequence
991
"""
992
993
def getDescription(self) -> str:
994
"""
995
Get protein description.
996
997
Returns:
998
str: Protein description/name
999
"""
1000
1001
def setDescription(self, description: str) -> None:
1002
"""
1003
Set protein description.
1004
1005
Args:
1006
description (str): Protein description
1007
"""
1008
1009
def getCoverage(self) -> float:
1010
"""
1011
Get sequence coverage.
1012
1013
Returns:
1014
float: Sequence coverage percentage
1015
"""
1016
1017
def setCoverage(self, coverage: float) -> None:
1018
"""
1019
Set sequence coverage.
1020
1021
Args:
1022
coverage (float): Coverage percentage
1023
"""
1024
```
1025
1026
### Database Search Configuration
1027
1028
#### SearchParameters
1029
1030
Search engine configuration parameters.
1031
1032
```python { .api }
1033
class SearchParameters:
1034
def __init__(self) -> None: ...
1035
1036
def getDatabase(self) -> str:
1037
"""
1038
Get database filename.
1039
1040
Returns:
1041
str: Database file path
1042
"""
1043
1044
def setDatabase(self, database: str) -> None:
1045
"""
1046
Set database filename.
1047
1048
Args:
1049
database (str): Database file path
1050
"""
1051
1052
def getEnzyme(self) -> str:
1053
"""
1054
Get enzyme name.
1055
1056
Returns:
1057
str: Enzyme name
1058
"""
1059
1060
def setEnzyme(self, enzyme: str) -> None:
1061
"""
1062
Set enzyme name.
1063
1064
Args:
1065
enzyme (str): Enzyme name
1066
"""
1067
1068
def getMissedCleavages(self) -> int:
1069
"""
1070
Get allowed missed cleavages.
1071
1072
Returns:
1073
int: Number of missed cleavages
1074
"""
1075
1076
def setMissedCleavages(self, missed: int) -> None:
1077
"""
1078
Set allowed missed cleavages.
1079
1080
Args:
1081
missed (int): Number of missed cleavages
1082
"""
1083
1084
def getPrecursorMassTolerance(self) -> float:
1085
"""
1086
Get precursor mass tolerance.
1087
1088
Returns:
1089
float: Mass tolerance value
1090
"""
1091
1092
def setPrecursorMassTolerance(self, tolerance: float) -> None:
1093
"""
1094
Set precursor mass tolerance.
1095
1096
Args:
1097
tolerance (float): Mass tolerance value
1098
"""
1099
1100
def getFragmentMassTolerance(self) -> float:
1101
"""
1102
Get fragment mass tolerance.
1103
1104
Returns:
1105
float: Mass tolerance value
1106
"""
1107
1108
def setFragmentMassTolerance(self, tolerance: float) -> None:
1109
"""
1110
Set fragment mass tolerance.
1111
1112
Args:
1113
tolerance (float): Mass tolerance value
1114
"""
1115
1116
def getVariableModifications(self) -> list[str]:
1117
"""
1118
Get variable modifications.
1119
1120
Returns:
1121
list[str]: Variable modification names
1122
"""
1123
1124
def setVariableModifications(self, mods: list[str]) -> None:
1125
"""
1126
Set variable modifications.
1127
1128
Args:
1129
mods (list[str]): Variable modification names
1130
"""
1131
1132
def getFixedModifications(self) -> list[str]:
1133
"""
1134
Get fixed modifications.
1135
1136
Returns:
1137
list[str]: Fixed modification names
1138
"""
1139
1140
def setFixedModifications(self, mods: list[str]) -> None:
1141
"""
1142
Set fixed modifications.
1143
1144
Args:
1145
mods (list[str]): Fixed modification names
1146
"""
1147
```
1148
1149
## Usage Examples
1150
1151
### Peptide Sequence Analysis
1152
1153
```python
1154
import pyopenms
1155
1156
# Create peptide sequences
1157
peptide = pyopenms.AASequence.fromString("PEPTIDE")
1158
modified_peptide = pyopenms.AASequence.fromString("PEPTIDEM(Oxidation)K")
1159
1160
# Get sequence properties
1161
print(f"Sequence: {peptide.toString()}")
1162
print(f"Length: {peptide.size()}")
1163
print(f"Monoisotopic weight: {peptide.getMonoWeight():.4f} Da")
1164
print(f"Average weight: {peptide.getAverageWeight():.4f} Da")
1165
print(f"Formula: {peptide.getFormula().toString()}")
1166
1167
# Check modifications
1168
print(f"Is modified: {modified_peptide.isModified()}")
1169
print(f"Modified sequence: {modified_peptide.toString()}")
1170
print(f"Unmodified sequence: {modified_peptide.toUnmodifiedString()}")
1171
1172
# Access individual residues
1173
for i in range(peptide.size()):
1174
residue = peptide.getResidue(i)
1175
print(f"Position {i}: {residue.getOneLetterCode()} ({residue.getMonoWeight():.4f} Da)")
1176
```
1177
1178
### Working with Modifications
1179
1180
```python
1181
import pyopenms
1182
1183
# Access modifications database
1184
mod_db = pyopenms.ModificationsDB.getInstance()
1185
1186
# Get specific modification
1187
oxidation = mod_db.getModification("Oxidation")
1188
print(f"Oxidation: {oxidation.getFullName()}")
1189
print(f"Mass delta: {oxidation.getMonoMass():.6f} Da")
1190
print(f"Target residue: {oxidation.getOrigin()}")
1191
1192
# Get all available modifications for methionine
1193
mods = mod_db.getModifications("M", pyopenms.ResidueModification.ANYWHERE)
1194
print(f"Available modifications for methionine: {len(mods)}")
1195
for mod in mods:
1196
print(f" {mod.getName()}: {mod.getMonoMass():+.6f} Da")
1197
1198
# Create modified sequence programmatically
1199
seq = pyopenms.AASequence.fromString("METHIONINE")
1200
seq.setModification(0, "Oxidation") # Oxidize first methionine
1201
print(f"Modified sequence: {seq.toString()}")
1202
```
1203
1204
### Processing Identification Results
1205
1206
```python
1207
import pyopenms
1208
1209
# Load identification results
1210
protein_ids = []
1211
peptide_ids = []
1212
pyopenms.IdXMLFile().load("search_results.idXML", protein_ids, peptide_ids)
1213
1214
print(f"Loaded {len(protein_ids)} protein ID runs")
1215
print(f"Loaded {len(peptide_ids)} peptide identifications")
1216
1217
# Process peptide identifications
1218
high_confidence_hits = []
1219
for pep_id in peptide_ids:
1220
hits = pep_id.getHits()
1221
if hits:
1222
best_hit = hits[0] # Hits are sorted by score
1223
if best_hit.getScore() < 0.01: # E-value < 0.01
1224
high_confidence_hits.append({
1225
'sequence': best_hit.getSequence().toString(),
1226
'score': best_hit.getScore(),
1227
'charge': best_hit.getCharge(),
1228
'rt': pep_id.getRT(),
1229
'mz': pep_id.getMZ()
1230
})
1231
1232
print(f"High confidence hits: {len(high_confidence_hits)}")
1233
1234
# Show top hits
1235
for i, hit in enumerate(high_confidence_hits[:10]):
1236
print(f"{i+1}. {hit['sequence']} (score: {hit['score']:.2e}, charge: {hit['charge']})")
1237
```
1238
1239
### Protein Analysis
1240
1241
```python
1242
import pyopenms
1243
1244
# Process protein identifications
1245
protein_ids = []
1246
peptide_ids = []
1247
pyopenms.IdXMLFile().load("search_results.idXML", protein_ids, peptide_ids)
1248
1249
if protein_ids:
1250
prot_id = protein_ids[0] # First search run
1251
protein_hits = prot_id.getHits()
1252
1253
print(f"Search engine: {prot_id.getSearchEngine()}")
1254
print(f"Database: {prot_id.getSearchParameters().getDatabase()}")
1255
print(f"Found {len(protein_hits)} protein hits")
1256
1257
# Sort by score and display top proteins
1258
protein_hits.sort(key=lambda x: x.getScore(), reverse=True)
1259
1260
for i, hit in enumerate(protein_hits[:10]):
1261
print(f"{i+1}. {hit.getAccession()}")
1262
print(f" Score: {hit.getScore():.2f}")
1263
print(f" Description: {hit.getDescription()}")
1264
print(f" Coverage: {hit.getCoverage():.1f}%")
1265
print()
1266
```
1267
1268
### Export to Pandas DataFrame
1269
1270
```python
1271
import pyopenms
1272
1273
# Load identifications
1274
protein_ids = []
1275
peptide_ids = []
1276
pyopenms.IdXMLFile().load("search_results.idXML", protein_ids, peptide_ids)
1277
1278
# Convert to DataFrame
1279
df = pyopenms.peptide_identifications_to_df(peptide_ids)
1280
1281
print("Identification DataFrame:")
1282
print(df.columns.tolist())
1283
print(df.head())
1284
1285
# Basic statistics
1286
print(f"\nTotal identifications: {len(df)}")
1287
print(f"Unique sequences: {df['id'].nunique()}")
1288
print(f"Score distribution:")
1289
print(df['score'].describe())
1290
1291
# Filter high confidence identifications
1292
high_conf = df[df['score'] < 0.01]
1293
print(f"High confidence hits: {len(high_conf)}")
1294
```