tessl/pypi-objgraph

Draws Python object reference graphs with graphviz

—

Pending

Overview

Eval results

Files

Object Discovery and Access

Name: tessl/pypi-objgraph
Author: tessl

Functions for finding specific objects by type or memory address, and identifying objects that may indicate memory management issues. These tools help locate objects for further analysis and investigation.

Capabilities

Type-Based Object Discovery

Find all objects of a specific type tracked by the garbage collector.

def by_type(typename, objects=None):
    """
    Return objects tracked by the garbage collector with a given class name.
    
    Args:
        typename (str): Class name, can be short ('MyClass') or fully qualified ('mymodule.MyClass')
        objects (list, optional): Custom object collection to search instead of gc.get_objects()
        
    Returns:
        list: List of objects matching the type name
        
    Note:
        The garbage collector does not track simple objects like int or str.
    """

Usage examples:

import objgraph

# Find all list objects
all_lists = objgraph.by_type('list')
print(f"Found {len(all_lists)} list objects")

# Find custom class instances
my_objects = objgraph.by_type('MyClass')
for obj in my_objects:
    print(f"MyClass instance: {obj}")

# Use fully qualified names to be specific
specific_objects = objgraph.by_type('mymodule.MyClass')

# Search within a specific collection
leaking_objects = objgraph.get_leaking_objects()
leaking_dicts = objgraph.by_type('dict', leaking_objects)

# Combine with other analysis
large_lists = [lst for lst in objgraph.by_type('list') if len(lst) > 1000]
print(f"Found {len(large_lists)} large lists")

Address-Based Object Access

Retrieve objects using their memory addresses, useful for tracking specific object instances.

def at(addr):
    """
    Return an object at a given memory address.
    
    Args:
        addr (int): Memory address from id(obj)
        
    Returns:
        object or None: Object at the address, or None if not found
        
    Note:
        Only works on objects tracked by the garbage collector.
        The reverse of id(obj): at(id(obj)) is obj should be True.
    """

Usage examples:

import objgraph

# Basic address lookup
my_list = [1, 2, 3]
address = id(my_list)
found_obj = objgraph.at(address)
print(f"Found same object: {found_obj is my_list}")

# Use with object tracking
objects_to_track = []
for i in range(10):
    new_dict = {'index': i}
    objects_to_track.append(id(new_dict))

# Later retrieve the objects
for addr in objects_to_track:
    obj = objgraph.at(addr)
    if obj is not None:
        print(f"Object at {addr}: {obj}")
    else:
        print(f"Object at {addr} was garbage collected")

Bulk Address Resolution

Retrieve multiple objects from a set of memory addresses efficiently.

def at_addrs(address_set):
    """
    Return a list of objects for a given set of memory addresses.
    
    Args:
        address_set (set): Set of memory addresses from id() calls
        
    Returns:
        list: List of objects found at those addresses, in arbitrary order
        
    Note:
        Objects are returned in arbitrary order.
        Only works on objects tracked by the garbage collector.
    """

Usage examples:

import objgraph

# Use with get_new_ids for object tracking
objgraph.get_new_ids(limit=0)  # Establish baseline

# Create some objects
a = [1, 2, 3]
b = [4, 5, 6]  
c = {'key': 'value'}

# Get IDs of newly created objects
new_ids = objgraph.get_new_ids(limit=0)

# Retrieve the actual list objects
new_lists = objgraph.at_addrs(new_ids.get('list', set()))
print(f"Created {len(new_lists)} new lists")
print(f"Our list 'a' is in new lists: {a in new_lists}")
print(f"Our list 'b' is in new lists: {b in new_lists}")

# Combine with filtering
large_new_lists = [lst for lst in new_lists if len(lst) > 5]

# Save addresses for later analysis
important_object_ids = {id(obj) for obj in some_important_objects}
# ... later ...
still_alive = objgraph.at_addrs(important_object_ids)
print(f"{len(still_alive)} of {len(important_object_ids)} objects still exist")

Leak Detection

Identify objects that have no referrers, which may indicate reference-counting bugs or orphaned objects.

def get_leaking_objects(objects=None):
    """
    Return objects that do not have any referrers.
    
    Args:
        objects (list, optional): Custom object collection to analyze instead of gc.get_objects()
        
    Returns:
        list: List of objects without referrers
        
    Note:
        These could indicate reference-counting bugs in C code or be legitimate.
        The garbage collector does not track simple objects like int or str.
        Calls gc.collect() automatically.
    """

Usage examples:

import objgraph

# Find potentially leaking objects
leaking = objgraph.get_leaking_objects()
print(f"Found {len(leaking)} objects without referrers")

# Analyze leaking objects by type
if leaking:
    leak_stats = objgraph.typestats(leaking)
    print("Leaking object types:")
    for obj_type, count in sorted(leak_stats.items(), key=lambda x: x[1], reverse=True):
        print(f"  {obj_type}: {count}")

# Investigate specific leaking objects
if leaking:
    for obj in leaking[:5]:  # Look at first 5
        print(f"Leaking {type(obj).__name__}: {repr(obj)[:100]}")
        
        # Try to find what might be keeping references
        # (though by definition, these objects have no referrers)
        objgraph.show_backrefs([obj], max_depth=2)

# Filter leaking objects
large_leaking = [obj for obj in leaking 
                 if hasattr(obj, '__len__') and len(obj) > 100]

# Combine with custom object collection
my_objects = objgraph.by_type('MyClass')
leaking_my_objects = objgraph.get_leaking_objects(my_objects)
if leaking_my_objects:
    print(f"Found {len(leaking_my_objects)} leaking MyClass instances")

Common Workflows

Object Lifecycle Tracking

import objgraph
import weakref

class TrackedObject:
    def __init__(self, name):
        self.name = name
    
    def __repr__(self):
        return f"TrackedObject({self.name})"

# Create some objects to track
objects = [TrackedObject(f"obj_{i}") for i in range(5)]
object_ids = {id(obj) for obj in objects}

print(f"Created {len(objects)} objects")

# Clear some references
del objects[2:4]  # Remove references to obj_2 and obj_3

# Check which objects still exist
surviving_objects = objgraph.at_addrs(object_ids)
print(f"{len(surviving_objects)} objects survived")

# Force garbage collection and check again
import gc
gc.collect()
surviving_after_gc = objgraph.at_addrs(object_ids)
print(f"{len(surviving_after_gc)} objects survived after GC")

Memory Leak Investigation

import objgraph

# Identify potential problem objects
leaking = objgraph.get_leaking_objects()
if leaking:
    # Group by type
    leak_by_type = {}
    for obj in leaking:
        obj_type = type(obj).__name__
        leak_by_type.setdefault(obj_type, []).append(obj)
    
    # Focus on most numerous type
    most_common_type = max(leak_by_type.keys(), key=lambda t: len(leak_by_type[t]))
    problem_objects = leak_by_type[most_common_type]
    
    print(f"Most leaking type: {most_common_type} ({len(problem_objects)} instances)")
    
    # Analyze a few examples
    for obj in problem_objects[:3]:
        print(f"Investigating: {repr(obj)[:100]}")
        # Even though these have no referrers, show the object's contents
        objgraph.show_refs([obj], max_depth=2)

Install with Tessl CLI