Platform wheels for RDKit - a comprehensive cheminformatics and machine-learning library with Python bindings
89
Pharmacophore and chemical feature detection for identifying functional groups, binding sites, and molecular patterns. RDKit's feature system enables systematic analysis of chemical functionality through customizable feature definitions and automated pattern recognition algorithms essential for drug discovery and chemical analysis.
Create feature detection factories from feature definition files for systematic chemical pattern recognition.
def BuildFeatureFactory(fdefName: str):
"""
Create a feature factory from a feature definition file.
Parameters:
- fdefName: Path to feature definition file (.fdef format)
Returns:
FeatureFactory object for pattern detection
"""
def BuildFeatureFactoryFromString(fdefString: str):
"""
Create a feature factory from a feature definition string.
Parameters:
- fdefString: Feature definition string in .fdef format
Returns:
FeatureFactory object for pattern detection
"""Identify chemical features and functional groups in molecular structures.
class FeatureFactory:
"""
Factory for detecting chemical features in molecules.
"""
def GetFeaturesForMol(self, mol: Mol, confId: int = -1) -> list:
"""
Find all features in a molecule.
Parameters:
- mol: Input molecule
- confId: Conformer ID to use (default -1)
Returns:
List of ChemicalFeature objects
"""
def GetFeatureDefs(self) -> list:
"""
Get all feature definitions in this factory.
Returns:
List of FeatureDef objects
"""
def GetFeatureFamilies(self) -> list:
"""
Get all feature family names.
Returns:
List of feature family name strings
"""Represent detected chemical features with spatial and chemical information.
class ChemicalFeature:
"""
Represents a detected chemical feature in a molecule.
"""
def GetFamily(self) -> str:
"""
Get the feature family name.
Returns:
Feature family string (e.g., 'Donor', 'Acceptor', 'Aromatic')
"""
def GetType(self) -> str:
"""
Get the specific feature type.
Returns:
Feature type string
"""
def GetAtomIds(self) -> tuple:
"""
Get atom indices involved in this feature.
Returns:
Tuple of atom indices
"""
def GetPos(self) -> tuple:
"""
Get the 3D position of the feature.
Returns:
(x, y, z) coordinate tuple
"""
def GetId(self) -> int:
"""
Get the feature ID.
Returns:
Feature ID integer
"""Define patterns and rules for chemical feature detection.
class FeatureDef:
"""
Definition of a chemical feature pattern.
"""
def GetFamily(self) -> str:
"""
Get the feature family name.
Returns:
Feature family string
"""
def GetType(self) -> str:
"""
Get the specific feature type name.
Returns:
Feature type string
"""
def GetSmarts(self) -> str:
"""
Get the SMARTS pattern for this feature.
Returns:
SMARTS pattern string
"""
def GetWeight(self) -> float:
"""
Get the feature weight/importance.
Returns:
Weight value
"""Analyze spatial relationships between chemical features for pharmacophore modeling.
def GetDistanceMatrix(features: list, confId: int = -1) -> list:
"""
Calculate distance matrix between features.
Parameters:
- features: List of ChemicalFeature objects
- confId: Conformer ID to use (default -1)
Returns:
2D list representing distance matrix
"""
def Get3DDistanceMatrix(mol: Mol, confId: int = -1, useAtomWts: bool = False) -> list:
"""
Calculate 3D distance matrix for all atoms in a molecule.
Parameters:
- mol: Input molecule with 3D coordinates
- confId: Conformer ID to use (default -1)
- useAtomWts: Weight distances by atomic weights (default False)
Returns:
2D list representing distance matrix
"""Access to RDKit's standard feature definition resources.
def GetFeatureDefFile() -> str:
"""
Get path to the default BaseFeatures.fdef file.
Returns:
Path to BaseFeatures.fdef in RDKit data directory
"""RDKit's BaseFeatures.fdef defines several standard feature families:
import os
from rdkit import Chem, RDConfig
from rdkit.Chem import ChemicalFeatures
# Load the default feature factory
fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
# Create a molecule and detect features
mol = Chem.MolFromSmiles('OCc1ccccc1CN') # Tyramine-like molecule
features = factory.GetFeaturesForMol(mol)
print(f"Found {len(features)} features:")
for feat in features:
print(f"- {feat.GetFamily()}: atoms {feat.GetAtomIds()}")import os
from rdkit import Chem, RDConfig
from rdkit.Chem import ChemicalFeatures, AllChem
# Prepare molecule with 3D coordinates
mol = Chem.MolFromSmiles('OCc1ccccc1CN')
AllChem.EmbedMolecule(mol)
AllChem.MMFFOptimizeMolecule(mol)
# Detect features
fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
features = factory.GetFeaturesForMol(mol)
# Analyze feature positions and distances
donor_features = [f for f in features if f.GetFamily() == 'Donor']
acceptor_features = [f for f in features if f.GetFamily() == 'Acceptor']
print(f"Donors: {len(donor_features)}")
print(f"Acceptors: {len(acceptor_features)}")
# Calculate distances between donors and acceptors
for i, donor in enumerate(donor_features):
for j, acceptor in enumerate(acceptor_features):
pos1 = donor.GetPos()
pos2 = acceptor.GetPos()
distance = ((pos1[0]-pos2[0])**2 + (pos1[1]-pos2[1])**2 + (pos1[2]-pos2[2])**2)**0.5
print(f"Donor {i} to Acceptor {j}: {distance:.2f} Ų")from rdkit.Chem import ChemicalFeatures
# Define custom feature patterns
custom_fdef = """
DefineFeature HalogenBond [#9,#17,#35,#53;X1]
Family HalogenBond
Weights 1.0
EndFeature
DefineFeature Nitrile [C]#[N]
Family Nitrile
Weights 1.0,1.0
EndFeature
"""
# Create factory from custom definitions
factory = ChemicalFeatures.BuildFeatureFactoryFromString(custom_fdef)
# Test on molecules containing these features
mol1 = Chem.MolFromSmiles('CCF') # Contains fluorine
mol2 = Chem.MolFromSmiles('CC#N') # Contains nitrile
features1 = factory.GetFeaturesForMol(mol1)
features2 = factory.GetFeaturesForMol(mol2)
print(f"Molecule 1 features: {[f.GetFamily() for f in features1]}")
print(f"Molecule 2 features: {[f.GetFamily() for f in features2]}")import os
from rdkit import RDConfig
from rdkit.Chem import ChemicalFeatures
# Load factory and inspect available features
fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
# Get all feature families
families = factory.GetFeatureFamilies()
print(f"Available feature families: {families}")
# Get feature definitions
feature_defs = factory.GetFeatureDefs()
print(f"\nTotal feature definitions: {len(feature_defs)}")
for fdef in feature_defs[:5]: # Show first 5
print(f"- {fdef.GetFamily()}/{fdef.GetType()}: {fdef.GetSmarts()}")import os
from rdkit import Chem, RDConfig
from rdkit.Chem import ChemicalFeatures, Descriptors
# Combine feature detection with descriptor calculation
mol = Chem.MolFromSmiles('OCc1ccccc1CN')
# Calculate basic descriptors
mw = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
hbd = Descriptors.NumHDonors(mol)
hba = Descriptors.NumHAcceptors(mol)
# Detect detailed features
fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
features = factory.GetFeaturesForMol(mol)
# Summarize results
print(f"Molecular Weight: {mw:.2f}")
print(f"LogP: {logp:.2f}")
print(f"HB Donors (Descriptors): {hbd}")
print(f"HB Acceptors (Descriptors): {hba}")
print(f"Total Features Detected: {len(features)}")
# Count features by family
feature_counts = {}
for feat in features:
family = feat.GetFamily()
feature_counts[family] = feature_counts.get(family, 0) + 1
for family, count in feature_counts.items():
print(f"{family}: {count}")Install with Tessl CLI
npx tessl i tessl/pypi-rdkitevals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10