End-to-end Optical Music Recognition (OMR) system for transcribing musical notation from images into structured MusicXML format.
—
Complete end-to-end optical music recognition pipeline that handles the full workflow from image input to MusicXML output. The pipeline orchestrates neural network inference, feature extraction, musical analysis, and structured document generation.
The main extraction function that coordinates the entire OMR pipeline.
def extract(args: Namespace) -> str:
"""
Main extraction pipeline function that processes a music sheet image and generates MusicXML.
Parameters:
- args.img_path (str): Path to the input image file
- args.output_path (str): Directory path for output files
- args.use_tf (bool): Use TensorFlow instead of ONNX runtime for inference
- args.save_cache (bool): Save model predictions to disk for reuse
- args.without_deskew (bool): Skip the deskewing step for aligned images
Returns:
str: Full path to the generated MusicXML file
Raises:
FileNotFoundError: If the input image file doesn't exist
StafflineException: If staffline detection fails
"""The CLI entry point that provides the oemer command.
def main() -> None:
"""
CLI entry point for the oemer command.
Parses command line arguments and executes the extraction pipeline.
Downloads model checkpoints automatically if not present.
"""
def get_parser() -> ArgumentParser:
"""
Get the command line argument parser.
Returns:
ArgumentParser: Configured parser for oemer CLI options
"""Generate predictions using the two-stage neural network architecture.
def generate_pred(img_path: str, use_tf: bool = False) -> Tuple[ndarray, ndarray, ndarray, ndarray, ndarray]:
"""
Generate neural network predictions for all musical elements.
Runs two U-Net models:
1. Staff vs. symbols segmentation
2. Detailed symbol classification
Parameters:
- img_path (str): Path to the input image
- use_tf (bool): Use TensorFlow instead of ONNX runtime
Returns:
Tuple containing:
- staff (ndarray): Staff line predictions
- symbols (ndarray): General symbol predictions
- stems_rests (ndarray): Stems and rests predictions
- notehead (ndarray): Note head predictions
- clefs_keys (ndarray): Clefs and accidentals predictions
"""Functions for managing intermediate processing data and model checkpoints.
def clear_data() -> None:
"""
Clear all registered processing layers.
Removes all intermediate data from the layer management system
to free memory and reset state between processing runs.
"""
def download_file(title: str, url: str, save_path: str) -> None:
"""
Download model checkpoint files with progress tracking.
Parameters:
- title (str): Display name for download progress
- url (str): URL of the file to download
- save_path (str): Local path to save the downloaded file
"""
def polish_symbols(rgb_black_th: int = 300) -> ndarray:
"""
Polish symbol predictions by filtering background pixels.
Parameters:
- rgb_black_th (int): RGB threshold for black pixel detection
Returns:
ndarray: Refined symbol predictions
"""Functions for registering extracted elements in the layer system.
def register_notehead_bbox(bboxes: List[BBox]) -> ndarray:
"""
Register notehead bounding boxes in the layer system.
Parameters:
- bboxes (List[BBox]): List of notehead bounding boxes
Returns:
ndarray: Updated bounding box layer
"""
def register_note_id() -> None:
"""
Register note IDs in the layer system.
Creates a mapping layer where each pixel contains the ID
of the note it belongs to, or -1 for background pixels.
"""from oemer.ete import extract, clear_data
from argparse import Namespace
import os
# Clear any previous data
clear_data()
# Configure extraction parameters
args = Namespace(
img_path='scores/beethoven_symphony.jpg',
output_path='./output/',
use_tf=False, # Use ONNX runtime (faster)
save_cache=True, # Save predictions for reuse
without_deskew=False # Enable deskewing for phone photos
)
try:
# Run the extraction pipeline
musicxml_path = extract(args)
print(f"Successfully generated: {musicxml_path}")
# Check if teaser image was also created
teaser_path = musicxml_path.replace('.musicxml', '_teaser.png')
if os.path.exists(teaser_path):
print(f"Analysis visualization: {teaser_path}")
except FileNotFoundError:
print(f"Image file not found: {args.img_path}")
except Exception as e:
print(f"Processing failed: {e}")import os
from pathlib import Path
from oemer.ete import extract, clear_data
from argparse import Namespace
def process_directory(input_dir: str, output_dir: str):
"""Process all images in a directory."""
input_path = Path(input_dir)
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
# Find all image files
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
image_files = [f for f in input_path.iterdir()
if f.suffix.lower() in image_extensions]
print(f"Found {len(image_files)} images to process")
for i, img_file in enumerate(image_files, 1):
print(f"Processing {i}/{len(image_files)}: {img_file.name}")
# Clear data between files to free memory
clear_data()
args = Namespace(
img_path=str(img_file),
output_path=str(output_path),
use_tf=False,
save_cache=True, # Reuse predictions if processing same image again
without_deskew=False
)
try:
musicxml_path = extract(args)
print(f"✓ Generated: {Path(musicxml_path).name}")
except Exception as e:
print(f"✗ Failed: {e}")
# Process a directory of sheet music images
process_directory('./sheet_music_images/', './musicxml_output/')from oemer.ete import extract
from argparse import Namespace
# Test with both ONNX and TensorFlow backends
test_image = 'test_score.jpg'
# ONNX runtime (default - faster inference)
args_onnx = Namespace(
img_path=test_image,
output_path='./onnx_output/',
use_tf=False,
save_cache=False,
without_deskew=False
)
# TensorFlow backend (may have different precision)
args_tf = Namespace(
img_path=test_image,
output_path='./tf_output/',
use_tf=True,
save_cache=False,
without_deskew=False
)
print("Processing with ONNX runtime...")
onnx_result = extract(args_onnx)
print("Processing with TensorFlow...")
tf_result = extract(args_tf)
print(f"ONNX result: {onnx_result}")
print(f"TensorFlow result: {tf_result}")The extraction pipeline follows these stages:
Each stage can access intermediate results through the layer management system, enabling modular processing and debugging capabilities.
Install with Tessl CLI
npx tessl i tessl/pypi-oemer