Platform wheels for RDKit - a comprehensive cheminformatics and machine-learning library with Python bindings
89
Calculation of 2D molecular descriptors for characterizing chemical properties. RDKit provides 217+ molecular descriptors covering molecular weight, lipophilicity, topological indices, structural features, and pharmacophore properties for quantitative structure-activity relationship (QSAR) analysis and chemical space exploration.
Fundamental molecular properties including molecular weight, heavy atom count, and basic structural metrics.
def MolWt(mol: Mol) -> float:
"""
Calculate molecular weight.
Parameters:
- mol: Input molecule
Returns:
Molecular weight in Daltons
"""
def ExactMolWt(mol: Mol) -> float:
"""
Calculate exact molecular weight using isotopic masses.
Parameters:
- mol: Input molecule
Returns:
Exact molecular weight in Daltons
"""
def HeavyAtomCount(mol: Mol) -> int:
"""
Count heavy (non-hydrogen) atoms.
Parameters:
- mol: Input molecule
Returns:
Number of heavy atoms
"""
def NumHeteroatoms(mol: Mol) -> int:
"""
Count heteroatoms (non-carbon heavy atoms).
Parameters:
- mol: Input molecule
Returns:
Number of heteroatoms
"""Descriptors related to molecular lipophilicity and solubility properties.
def MolLogP(mol: Mol) -> float:
"""
Calculate Wildman-Crippen LogP.
Parameters:
- mol: Input molecule
Returns:
Calculated LogP value
"""
def MolMR(mol: Mol) -> float:
"""
Calculate Wildman-Crippen molar refractivity.
Parameters:
- mol: Input molecule
Returns:
Molar refractivity value
"""
def TPSA(mol: Mol) -> float:
"""
Calculate topological polar surface area.
Parameters:
- mol: Input molecule
Returns:
TPSA value in Ų
"""Descriptors for hydrogen bond donors and acceptors, critical for drug-like property assessment.
def NumHDonors(mol: Mol) -> int:
"""
Count hydrogen bond donors.
Parameters:
- mol: Input molecule
Returns:
Number of hydrogen bond donors
"""
def NumHAcceptors(mol: Mol) -> int:
"""
Count hydrogen bond acceptors.
Parameters:
- mol: Input molecule
Returns:
Number of hydrogen bond acceptors
"""Descriptors characterizing ring systems and cyclic structures.
def RingCount(mol: Mol) -> int:
"""
Count number of rings.
Parameters:
- mol: Input molecule
Returns:
Total number of rings
"""
def NumAromaticRings(mol: Mol) -> int:
"""
Count aromatic rings.
Parameters:
- mol: Input molecule
Returns:
Number of aromatic rings
"""
def NumAliphaticRings(mol: Mol) -> int:
"""
Count aliphatic rings.
Parameters:
- mol: Input molecule
Returns:
Number of aliphatic rings
"""
def NumSaturatedRings(mol: Mol) -> int:
"""
Count saturated rings.
Parameters:
- mol: Input molecule
Returns:
Number of saturated rings
"""Graph-based descriptors characterizing molecular topology and connectivity.
def BertzCT(mol: Mol) -> float:
"""
Calculate Bertz topological complexity.
Parameters:
- mol: Input molecule
Returns:
Bertz complexity index
"""
def Kappa1(mol: Mol) -> float:
"""
Calculate first kappa shape index.
Parameters:
- mol: Input molecule
Returns:
Kappa1 shape index
"""
def Kappa2(mol: Mol) -> float:
"""
Calculate second kappa shape index.
Parameters:
- mol: Input molecule
Returns:
Kappa2 shape index
"""
def Kappa3(mol: Mol) -> float:
"""
Calculate third kappa shape index.
Parameters:
- mol: Input molecule
Returns:
Kappa3 shape index
"""Functions for calculating multiple descriptors simultaneously.
def CalcMolDescriptors(mol: Mol) -> dict:
"""
Calculate all available molecular descriptors.
Parameters:
- mol: Input molecule
Returns:
Dictionary mapping descriptor names to values
"""
_descList: list
"""
List of all available descriptor names and functions.
Contains 217 descriptor definitions in version 2024.9.6.
Each entry is a tuple of (name, function).
"""Descriptors based on molecular fragments and substructures.
def fr_Al_COO(mol: Mol) -> int:
"""
Count aliphatic carboxylic acid fragments.
Parameters:
- mol: Input molecule
Returns:
Number of aliphatic carboxylic acid groups
"""
def fr_ArN(mol: Mol) -> int:
"""
Count aromatic nitrogen fragments.
Parameters:
- mol: Input molecule
Returns:
Number of aromatic nitrogens
"""
def fr_benzene(mol: Mol) -> int:
"""
Count benzene rings.
Parameters:
- mol: Input molecule
Returns:
Number of benzene rings
"""from rdkit import Chem
from rdkit.Chem import Descriptors
# Calculate basic descriptors
mol = Chem.MolFromSmiles('CCO') # Ethanol
mw = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
tpsa = Descriptors.TPSA(mol)
hbd = Descriptors.NumHDonors(mol)
hba = Descriptors.NumHAcceptors(mol)
print(f"Molecular Weight: {mw:.2f}")
print(f"LogP: {logp:.2f}")
print(f"TPSA: {tpsa:.2f}")
print(f"H-bond Donors: {hbd}")
print(f"H-bond Acceptors: {hba}")from rdkit import Chem
from rdkit.Chem import Descriptors
# Calculate all available descriptors
mol = Chem.MolFromSmiles('CCO')
all_descriptors = Descriptors.CalcMolDescriptors(mol)
print(f"Total descriptors calculated: {len(all_descriptors)}")
for name, value in list(all_descriptors.items())[:10]: # Show first 10
print(f"{name}: {value}")from rdkit import Chem
from rdkit.Chem import Descriptors
def assess_drug_likeness(smiles):
"""Assess Lipinski's Rule of Five compliance."""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
mw = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
hbd = Descriptors.NumHDonors(mol)
hba = Descriptors.NumHAcceptors(mol)
violations = 0
if mw > 500: violations += 1
if logp > 5: violations += 1
if hbd > 5: violations += 1
if hba > 10: violations += 1
return {
'MW': mw,
'LogP': logp,
'HBD': hbd,
'HBA': hba,
'Violations': violations,
'Drug-like': violations <= 1
}
# Test with aspirin
result = assess_drug_likeness('CC(=O)OC1=CC=CC=C1C(=O)O')
print(result)from rdkit.Chem import Descriptors
# Access all available descriptors
print(f"Total descriptors available: {len(Descriptors._descList)}")
# Show first 10 descriptor names
for i, (name, func) in enumerate(Descriptors._descList[:10]):
print(f"{i+1}: {name}")Install with Tessl CLI
npx tessl i tessl/pypi-rdkitevals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10