End-to-end Optical Music Recognition (OMR) system for transcribing musical notation from images into structured MusicXML format.
—
Extraction and analysis of musical noteheads from neural network predictions. This module identifies individual noteheads, determines their properties, and associates them with musical context.
Extract noteheads from neural network predictions and create structured representations.
def extract() -> List[NoteHead]:
"""
Extract noteheads from neural network predictions.
Processes notehead predictions from the layer system, identifies
individual notehead regions, and creates NoteHead instances with
properties like position, pitch, and rhythm type.
Returns:
List[NoteHead]: List of detected and analyzed noteheads
Raises:
KeyError: If required prediction layers are not available
"""Functions for refining notehead detection and classification.
def get_predict_class(region: ndarray) -> NoteType:
"""
Predict the note type (rhythm) from a notehead region.
Uses trained sklearn models to classify noteheads as whole, half,
quarter, eighth, etc. based on visual characteristics.
Parameters:
- region (ndarray): Image region containing the notehead
Returns:
NoteType: Predicted note type/rhythm classification
"""
def morph_notehead(note: NoteHead, size: Tuple[int, int] = (11, 8)) -> ndarray:
"""
Apply morphological operations to clean notehead region.
Parameters:
- note (NoteHead): The notehead to process
- size (Tuple[int, int]): Kernel size for morphological operations
Returns:
ndarray: Cleaned notehead region
"""Functions for analyzing notehead properties and context.
def get_symbol_regression(note: NoteHead, staff: Staff) -> int:
"""
Determine the pitch position of a notehead relative to staff lines.
Calculates the position of the notehead center relative to the
five staff lines to determine pitch.
Parameters:
- note (NoteHead): The notehead to analyze
- staff (Staff): The staff containing this notehead
Returns:
int: Staff line position (negative for below bottom line,
0-4 for on staff lines, positive for above top line)
"""
def check_stem_up_down(note: NoteHead) -> Optional[bool]:
"""
Determine stem direction for a notehead.
Analyzes the surrounding region to detect stem presence
and direction (up or down).
Parameters:
- note (NoteHead): The notehead to analyze
Returns:
Optional[bool]: True for stem up, False for stem down,
None if no stem detected
"""
def check_dots(note: NoteHead) -> bool:
"""
Check if a notehead has augmentation dots.
Searches the region to the right of the notehead for
augmentation dots that extend the note's duration.
Parameters:
- note (NoteHead): The notehead to check
Returns:
bool: True if dots are detected, False otherwise
"""Complete representation of a musical notehead with all associated properties.
class NoteHead:
"""
Represents a musical notehead with rhythm, pitch, and context information.
Attributes:
- points (List[Tuple[int, int]]): Pixel coordinates belonging to this notehead
- pitch (Optional[int]): MIDI pitch number (None if not determined)
- has_dot (bool): Whether this note has augmentation dots
- bbox (BBox): Bounding box coordinates (x1, y1, x2, y2)
- stem_up (Optional[bool]): Stem direction (True=up, False=down, None=no stem)
- stem_right (Optional[bool]): Whether stem is on right side of notehead
- track (Optional[int]): Track number for multi-staff systems
- group (Optional[int]): Group number for staff groupings
- staff_line_pos (int): Position relative to staff lines
- invalid (bool): Whether this detection is likely a false positive
- id (Optional[int]): Unique identifier within the document
- note_group_id (Optional[int]): ID of the note group this belongs to
- sfn (Optional[Any]): Associated sharp/flat/natural accidental
- label (NoteType): Rhythm classification (whole, half, quarter, etc.)
"""
def add_point(self, x: int, y: int) -> None:
"""
Add a pixel coordinate to this notehead.
Parameters:
- x (int): X coordinate
- y (int): Y coordinate
"""
def force_set_label(self, label: NoteType) -> None:
"""
Force set the note type label, overriding any existing label.
Parameters:
- label (NoteType): The note type to assign
Raises:
AssertionError: If label is not a NoteType instance
"""
@property
def label(self) -> Optional[NoteType]:
"""
Get the note type label.
Returns None if the notehead is marked as invalid.
Returns:
Optional[NoteType]: The note type or None if invalid
"""
@label.setter
def label(self, label: NoteType) -> None:
"""
Set the note type label.
Will not overwrite existing labels unless using force_set_label.
Parameters:
- label (NoteType): The note type to assign
"""Classification of note types by rhythm duration.
class NoteType(enum.Enum):
"""
Enumeration of note types by rhythmic duration.
"""
WHOLE = 0 # Whole note (4 beats)
HALF = 1 # Half note (2 beats)
QUARTER = 2 # Quarter note (1 beat)
EIGHTH = 3 # Eighth note (1/2 beat)
SIXTEENTH = 4 # Sixteenth note (1/4 beat)
THIRTY_SECOND = 5 # Thirty-second note (1/8 beat)
SIXTY_FOURTH = 6 # Sixty-fourth note (1/16 beat)
TRIPLET = 7 # Triplet note (special timing)
OTHERS = 8 # Other/unclassified note types
HALF_OR_WHOLE = 9 # Intermediate classification statefrom oemer.notehead_extraction import extract
from oemer.layers import register_layer, get_layer
import numpy as np
# Assume neural network predictions are already in layer system
# (This would normally be done by the inference module)
# Extract noteheads
try:
noteheads = extract()
print(f"Extracted {len(noteheads)} noteheads")
# Analyze each notehead
for i, note in enumerate(noteheads):
print(f"\nNotehead {i}:")
print(f" Position: {note.bbox}")
print(f" Pitch: {note.pitch}")
print(f" Type: {note.label}")
print(f" Has dot: {note.has_dot}")
print(f" Stem up: {note.stem_up}")
print(f" Track: {note.track}")
print(f" Valid: {not note.invalid}")
except KeyError as e:
print(f"Missing required layer: {e}")from oemer.notehead_extraction import extract, get_symbol_regression, check_dots
from oemer.staffline_extraction import extract as staff_extract
# Extract staffs and noteheads
staffs, _ = staff_extract()
noteheads = extract()
# Analyze noteheads in context of staffs
valid_notes = []
for note in noteheads:
if note.invalid:
continue
# Find the closest staff
closest_staff = None
min_distance = float('inf')
for staff in staffs:
distance = abs(note.bbox[1] - staff.y_center)
if distance < min_distance:
min_distance = distance
closest_staff = staff
if closest_staff:
# Get pitch position relative to staff
staff_pos = get_symbol_regression(note, closest_staff)
note.staff_line_pos = staff_pos
# Check for augmentation dots
has_dots = check_dots(note)
note.has_dot = has_dots
valid_notes.append(note)
print(f"Valid noteheads: {len(valid_notes)}")
# Group by note type
from collections import defaultdict
by_type = defaultdict(list)
for note in valid_notes:
by_type[note.label].append(note)
for note_type, notes in by_type.items():
print(f"{note_type.name}: {len(notes)} notes")from oemer.notehead_extraction import extract, get_predict_class, morph_notehead
import cv2
# Extract noteheads
noteheads = extract()
# Reclassify noteheads with custom processing
for note in noteheads:
if note.invalid:
continue
# Get the image region for this notehead
image = get_layer("original_image")
x1, y1, x2, y2 = note.bbox
region = image[y1:y2, x1:x2]
# Apply morphological cleaning
cleaned_region = morph_notehead(note, size=(11, 8))
# Reclassify the note type
predicted_type = get_predict_class(cleaned_region)
# Update the note if classification differs significantly
if note.label != predicted_type:
print(f"Note {note.id}: {note.label} -> {predicted_type}")
note.force_set_label(predicted_type)The notehead extraction module integrates with the main OMR pipeline:
notehead_pred layerstaffs layeroriginal_image layernotes layernote_id layerThe extracted noteheads are then used by:
This modular design allows for independent testing and refinement of each processing stage while maintaining integration with the overall pipeline.
Install with Tessl CLI
npx tessl i tessl/pypi-oemer