Create delightful software with Jupyter Notebooks
—
Clean notebook metadata, outputs, and configure git integration for notebooks. The cleaning system removes superfluous metadata, clears outputs, and makes notebooks git-friendly by handling merge conflicts and cell IDs.
Clean notebooks and remove unnecessary metadata for version control.
def nbdev_clean(fname: str = None, clear_all: bool = False,
disp: bool = False, read_input_dir: str = None,
write_input_dir: str = None):
"""
Clean notebooks in project.
Args:
fname: Specific notebook file or glob pattern to clean
clear_all: Remove all metadata and outputs (overrides settings)
disp: Display cleaning progress and changes
read_input_dir: Directory to read notebooks from
write_input_dir: Directory to write cleaned notebooks to
Removes unnecessary metadata, cell outputs, and execution counts
from notebooks to make them git-friendly and reduce diff noise.
"""
def clean_nb(nb):
"""
Clean a single notebook object.
Args:
nb: Notebook object to clean
Returns:
Cleaned notebook with metadata and outputs removed
according to configuration settings.
"""Usage Examples:
from nbdev.clean import nbdev_clean, clean_nb
from execnb.nbio import read_nb, write_nb
# Clean all notebooks in project
nbdev_clean()
# Clean specific notebook file
nbdev_clean('notebooks/01_core.ipynb')
# Clean with verbose output
nbdev_clean(disp=True)
# Clean and remove all metadata/outputs
nbdev_clean(clear_all=True)
# Clean notebook object directly
nb = read_nb('example.ipynb')
cleaned_nb = clean_nb(nb)
write_nb(cleaned_nb, 'cleaned_example.ipynb')Trust notebooks to enable execution of JavaScript and other dynamic content.
def nbdev_trust(fname: str = None, force_all: bool = False):
"""
Trust notebooks matching fname pattern.
Args:
fname: Notebook name or glob pattern to trust
force_all: Trust notebooks even if they haven't changed
Trusts notebooks for execution by signing them with Jupyter's
trust system. Only processes notebooks that have changed since
last trust operation unless force_all is True.
"""Usage Examples:
from nbdev.clean import nbdev_trust
# Trust all notebooks in project
nbdev_trust()
# Trust specific notebook
nbdev_trust('notebooks/analysis.ipynb')
# Force trust all notebooks regardless of change status
nbdev_trust(force_all=True)
# Trust notebooks matching pattern
nbdev_trust('notebooks/experimental/*.ipynb')Install git hooks for automatic notebook cleaning and processing.
def nbdev_install_hooks():
"""
Install git hooks for notebook processing.
Installs pre-commit and other git hooks that automatically:
- Clean notebooks before commits
- Handle notebook merge conflicts
- Process notebook metadata
- Ensure consistent notebook formatting
"""Usage Example:
from nbdev.clean import nbdev_install_hooks
# Install git hooks for automatic cleaning
nbdev_install_hooks()
# Now git commits will automatically clean notebooksConfigure Jupyter to work seamlessly with nbdev cleaning.
def clean_jupyter():
"""
Clean Jupyter-specific metadata and configuration.
Removes Jupyter-specific metadata that can cause merge conflicts
or unnecessary version control noise, including:
- Kernel specifications that may vary between environments
- Execution timing information
- Widget state that doesn't serialize well
"""Process and write notebooks with cleaning applied.
def process_write(nb, fname: str):
"""
Process and write notebook with cleaning.
Args:
nb: Notebook object to process
fname: Output filename for processed notebook
Applies all configured cleaning operations and writes
the cleaned notebook to the specified file.
"""Control cleaning behavior through settings.ini configuration:
# Remove cell IDs from notebooks
clean_ids = True
# Remove all metadata and outputs
clear_all = False
# Preserve specific metadata keys
allowed_metadata_keys = language_info,kernelspec
allowed_cell_metadata_keys = tags,id# Enable Jupyter git hooks
jupyter_hooks = Trueclean_ids=Trueallowed_metadata_keysBefore Cleaning:
{
"cell_type": "code",
"execution_count": 42,
"id": "a1b2c3d4-e5f6-7890",
"metadata": {
"scrolled": true,
"execution": {"iopub.execute_input": "2023-01-01T12:00:00.000Z"}
},
"outputs": [{"output_type": "stream", "text": "Hello World"}],
"source": "print('Hello World')"
}After Cleaning:
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "print('Hello World')"
}When nbdev_install_hooks() is run, git pre-commit hooks automatically:
# Before each commit, hooks run:
nbdev_clean --clear_all # Clean all notebooks
# Then proceed with commitCleaning helps resolve notebook merge conflicts by:
from nbdev.clean import nbdev_clean
# Clean only specific types of notebooks
nbdev_clean('notebooks/experiments/*.ipynb')
# Clean with custom settings
nbdev_clean(clear_all=True, disp=True)from nbdev.clean import nbdev_clean, nbdev_trust
from nbdev.export import nb_export
# Complete workflow
def prepare_notebooks():
"""Prepare notebooks for version control and export."""
# Clean notebooks
nbdev_clean(disp=True)
# Trust for execution
nbdev_trust()
# Export to modules
nb_export()
print("Notebooks prepared successfully")
prepare_notebooks()from nbdev.clean import clean_nb, process_write
from execnb.nbio import read_nb
from pathlib import Path
def custom_clean_workflow(nb_path):
"""Custom cleaning with additional processing."""
nb = read_nb(nb_path)
# Apply standard cleaning
cleaned_nb = clean_nb(nb)
# Custom processing
# Remove specific metadata, add custom fields, etc.
# Write back
process_write(cleaned_nb, nb_path)
# Apply to all notebooks
for nb_file in Path('notebooks').glob('*.ipynb'):
custom_clean_workflow(nb_file)# Add to your development workflow
nbdev_clean # Before committing changes
nbdev_trust # After pulling changesnbdev_install_hooks()# In GitHub Actions
- name: Clean notebooks
run: nbdev_clean --clear_all
- name: Verify notebooks are clean
run: |
nbdev_clean
git diff --exit-code # Fail if notebooks weren't already cleanInstall with Tessl CLI
npx tessl i tessl/pypi-nbdev