End-to-end Optical Music Recognition (OMR) system for transcribing musical notation from images into structured MusicXML format.
—
Advanced grouping of individual noteheads into musical chords and rhythm pattern recognition. This module combines related noteheads based on stems, beams, and spatial proximity to create meaningful musical structures.
Group individual noteheads into chord structures based on stems and beams.
def extract() -> Tuple[List[NoteGroup], ndarray]:
"""
Group noteheads by stems and beams into chord groups.
Analyzes the spatial relationships between noteheads and identifies
groups that should be played simultaneously (chords) or in sequence
(beamed note groups).
Returns:
Tuple containing:
- List[NoteGroup]: List of detected note groups
- ndarray: Group mapping array showing which group each pixel belongs to
Raises:
KeyError: If required layers (notes, stems_rests_pred) are not available
"""
def group_noteheads() -> Tuple[Dict[int, List[int]], ndarray]:
"""
Create initial groupings of noteheads based on spatial proximity.
Returns:
Tuple containing:
- Dict[int, List[int]]: Mapping of group IDs to note IDs
- ndarray: Group mapping visualization
"""
def get_possible_nearby_gid(cur_note: NoteHead, group_map: ndarray, scan_range_ratio: float = 5) -> List[int]:
"""
Find group IDs near a given notehead.
Parameters:
- cur_note (NoteHead): The notehead to search around
- group_map (ndarray): Current group mapping array
- scan_range_ratio (float): Search radius as multiple of unit size
Returns:
List[int]: List of nearby group IDs
"""
def check_valid_new_group(ori_grp: List[int], tar_grp: List[int], group_map: ndarray, max_x_diff_ratio: float = 0.5) -> bool:
"""
Check if two groups can be validly merged.
Parameters:
- ori_grp (List[int]): Original group note IDs
- tar_grp (List[int]): Target group note IDs
- group_map (ndarray): Group mapping array
- max_x_diff_ratio (float): Maximum horizontal separation ratio
Returns:
bool: True if groups can be merged, False otherwise
"""Extract rhythm information from beams, flags, and augmentation dots.
def extract(min_area_ratio: float = 0.08, max_area_ratio: float = 0.2, beam_th: float = 0.5) -> None:
"""
Extract rhythm information from beams, flags, and dots.
Analyzes beam structures, note flags, and augmentation dots to
determine the final rhythm values for each note and note group.
Parameters:
- min_area_ratio (float): Minimum area ratio for valid rhythm elements
- max_area_ratio (float): Maximum area ratio for valid rhythm elements
- beam_th (float): Threshold for beam detection
Raises:
KeyError: If required layers are not available
"""
def get_rhythm_class(region: ndarray, model_name: str = "rhythm") -> str:
"""
Classify rhythm type from image region using trained models.
Parameters:
- region (ndarray): Image region containing rhythm elements
- model_name (str): Name of sklearn model for rhythm classification
Returns:
str: Predicted rhythm class (beam, flag, etc.)
"""
def check_beam_connection(note1: NoteHead, note2: NoteHead, beam_predictions: ndarray) -> bool:
"""
Check if two noteheads are connected by a beam.
Parameters:
- note1 (NoteHead): First notehead
- note2 (NoteHead): Second notehead
- beam_predictions (ndarray): Beam detection predictions
Returns:
bool: True if noteheads are beam-connected
"""Represents a group of noteheads that form a musical chord or beamed group.
class NoteGroup:
"""
Represents a group of notes connected by stems, beams, or forming chords.
Attributes:
- id (Optional[int]): Unique identifier for this group
- bbox (BBox): Bounding box encompassing all notes in the group
- note_ids (List[int]): IDs of noteheads belonging to this group
- top_note_ids (List[int]): IDs of the highest notes (for multi-voice)
- bottom_note_ids (List[int]): IDs of the lowest notes (for multi-voice)
- stem_up (Optional[bool]): Direction of stem (True=up, False=down)
- has_stem (Optional[bool]): Whether this group has a visible stem
- all_same_type (Optional[bool]): Whether all notes have same rhythm type
- group (Optional[int]): Staff group number
- track (Optional[int]): Track number for multi-staff systems
"""
@property
def x_center(self) -> float:
"""
Get the horizontal center of this note group.
Returns:
float: X-coordinate of the group center
"""
def __len__(self) -> int:
"""Get the number of notes in this group."""
def __repr__(self) -> str:
"""String representation of the note group."""The grouping algorithm identifies noteheads that share common stems:
For beamed note groups (eighth notes and shorter):
For simultaneous notes (chords):
from oemer.note_group_extraction import extract
from oemer.layers import get_layer
import numpy as np
# Ensure required layers are available
try:
notes = get_layer('notes')
stems_rests = get_layer('stems_rests_pred')
# Extract note groups
note_groups, group_map = extract()
print(f"Found {len(note_groups)} note groups")
# Analyze each group
for i, group in enumerate(note_groups):
print(f"\nGroup {i}:")
print(f" Notes: {len(group)} noteheads")
print(f" Stem up: {group.stem_up}")
print(f" Has stem: {group.has_stem}")
print(f" Track: {group.track}")
print(f" Center: ({group.x_center:.1f})")
print(f" Same type: {group.all_same_type}")
# List individual notes in group
notes_layer = get_layer('notes')
for note_id in group.note_ids:
note = notes_layer[note_id]
print(f" Note {note_id}: {note.label}, pitch={note.pitch}")
except KeyError as e:
print(f"Required layer missing: {e}")from oemer.note_group_extraction import extract
from oemer.layers import get_layer
# Extract groups and analyze chords
note_groups, group_map = extract()
notes = get_layer('notes')
# Find chord groups (multiple notes at same time position)
chords = []
single_notes = []
for group in note_groups:
if len(group) > 1:
# This is potentially a chord
chord_notes = [notes[nid] for nid in group.note_ids]
# Check if notes are vertically aligned (true chord)
x_positions = [note.bbox[0] for note in chord_notes]
x_variance = np.var(x_positions)
if x_variance < 100: # Low variance indicates vertical alignment
chords.append(group)
print(f"Chord found: {len(chord_notes)} notes")
# Show chord notes from bottom to top
chord_notes.sort(key=lambda n: n.staff_line_pos)
for note in chord_notes:
print(f" {note.label.name} at position {note.staff_line_pos}")
else:
single_notes.append(group)
else:
single_notes.append(group)
print(f"\nFound {len(chords)} chords and {len(single_notes)} single note groups")from oemer.note_group_extraction import extract, check_beam_connection
from oemer.rhythm_extraction import extract as rhythm_extract
from oemer.layers import get_layer
# Extract groups and analyze beamed notes
note_groups, group_map = extract()
notes = get_layer('notes')
# Run rhythm analysis to detect beams
rhythm_extract()
# Find beamed groups
beamed_groups = []
for group in note_groups:
if len(group) > 1 and group.has_stem:
# Check if this is a beamed group
group_notes = [notes[nid] for nid in group.note_ids]
# Sort by horizontal position
group_notes.sort(key=lambda n: n.bbox[0])
# Check for beam connections between consecutive notes
is_beamed = False
beam_pred = get_layer('stems_rests_pred') # Contains beam info
for i in range(len(group_notes) - 1):
if check_beam_connection(group_notes[i], group_notes[i+1], beam_pred):
is_beamed = True
break
if is_beamed:
beamed_groups.append(group)
print(f"Beamed group: {len(group_notes)} notes")
# Show note sequence
for note in group_notes:
print(f" {note.label.name} at x={note.bbox[0]}")
print(f"\nFound {len(beamed_groups)} beamed note groups")from oemer.note_group_extraction import extract
from oemer.layers import get_layer
from collections import defaultdict
# Extract groups and separate voices
note_groups, group_map = extract()
notes = get_layer('notes')
# Group by track and analyze stem directions
by_track = defaultdict(list)
for group in note_groups:
by_track[group.track].append(group)
for track, groups in by_track.items():
print(f"\nTrack {track}:")
# Separate by stem direction (voices)
stem_up_groups = [g for g in groups if g.stem_up == True]
stem_down_groups = [g for g in groups if g.stem_up == False]
no_stem_groups = [g for g in groups if g.stem_up is None]
print(f" Stem up (voice 1): {len(stem_up_groups)} groups")
print(f" Stem down (voice 2): {len(stem_down_groups)} groups")
print(f" No stem: {len(no_stem_groups)} groups")
# Analyze voice crossing
if stem_up_groups and stem_down_groups:
up_positions = []
down_positions = []
for group in stem_up_groups:
for note_id in group.note_ids:
up_positions.append(notes[note_id].staff_line_pos)
for group in stem_down_groups:
for note_id in group.note_ids:
down_positions.append(notes[note_id].staff_line_pos)
if up_positions and down_positions:
avg_up = np.mean(up_positions)
avg_down = np.mean(down_positions)
if avg_up < avg_down: # Voice crossing detected
print(f" Voice crossing detected: up voice avg={avg_up:.1f}, down voice avg={avg_down:.1f}")The note grouping module integrates with other pipeline components:
Input Dependencies:
notes layer: Individual noteheads from notehead extractionstems_rests_pred layer: Stem and rest predictions from neural networkstaffs layer: Staff line information for contextOutput Products:
note_groups layer: Array of NoteGroup instancesgroup_map layer: Pixel-level mapping of group assignmentsDownstream Usage:
This modular approach allows the grouping algorithm to be refined independently while maintaining integration with the broader OMR pipeline.
Install with Tessl CLI
npx tessl i tessl/pypi-oemer