tessl/pypi-panoramix

An interactive data visualization platform built on SQLAlchemy and Druid.io

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Druid Data Sources

Name: tessl/pypi-panoramix
Author: tessl

Panoramix integrates with Apache Druid for real-time analytics and OLAP querying. Druid datasources provide high-performance analytics on streaming and batch data with pre-aggregated metrics and fast drill-down capabilities.

Capabilities

Druid Datasource Management

Manage Druid datasources with automatic metadata synchronization, dimension and metric discovery, and query optimization.

class Datasource(Model, AuditMixin, Queryable):
    """
    Druid datasource model for real-time analytics.
    
    Attributes:
        id (int): Primary key
        datasource_name (str): Unique datasource identifier
        is_featured (bool): Whether datasource appears in featured list
        is_hidden (bool): Whether datasource is hidden from UI
        description (str): Datasource description
        default_endpoint (str): Default visualization endpoint
        user_id (int): Foreign key to User
        owner (User): Datasource owner reference
        cluster_name (str): Name of the Druid cluster
        cluster (Cluster): Reference to Druid cluster
    """
    
    def query(self, groupby, metrics, granularity, from_dttm, to_dttm,
             limit_spec=None, filter=None, is_timeseries=True,
             timeseries_limit=15, row_limit=None):
        """
        Execute Druid query with aggregations and filters.
        
        Args:
            groupby (list): List of dimensions to group by
            metrics (list): List of metrics to calculate
            granularity (str): Time granularity ('second', 'minute', 'hour', 'day', 'week', 'month')
            from_dttm (datetime): Start datetime for time-based queries
            to_dttm (datetime): End datetime for time-based queries
            limit_spec (dict, optional): Limit specification
            filter (list, optional): List of filter conditions
            is_timeseries (bool): Whether query is time-based (default True)
            timeseries_limit (int): Limit for timeseries results (default 15)
            row_limit (int, optional): Maximum number of rows to return
            
        Returns:
            QueryResult: Named tuple with df, query, and duration
        """
    
    def get_metric_obj(self, metric_name):
        """
        Get metric configuration object by name.
        
        Args:
            metric_name (str): Name of the metric to retrieve
            
        Returns:
            Metric: Metric configuration object
        """
    
    @classmethod
    def sync_to_db(cls, name, cluster):
        """
        Synchronize datasource metadata from Druid cluster.
        
        Args:
            name (str): Datasource name in Druid
            cluster (Cluster): Druid cluster instance
            
        Returns:
            Datasource: Created or updated datasource instance
        """
    
    def latest_metadata(self):
        """
        Get latest metadata from Druid cluster.
        
        Returns:
            dict: Column metadata from segment information
        """
    
    def generate_metrics(self):
        """Generate default metrics for all columns."""
    
    @property
    def name(self):
        """Get the datasource name."""
        return self.datasource_name
    
    @property
    def datasource_link(self):
        """Get HTML link to the datasource view."""
        url = "/panoramix/datasource/{}/".format(self.datasource_name)
        return '<a href="{url}">{self.datasource_name}</a>'.format(**locals())
    
    @property
    def metrics_combo(self):
        """Get list of metric name/verbose name tuples for forms."""
        return sorted([
            (m.metric_name, m.verbose_name) for m in self.metrics
        ], key=lambda x: x[1])
    
    def __repr__(self):
        """String representation of the datasource."""
        return self.datasource_name

Druid Dimensions

Manage Druid dimensions (groupable columns) with data types and filtering capabilities.

class Column(Model, AuditMixin):
    """
    Druid datasource dimension metadata.
    
    Attributes:
        id (int): Primary key
        column_name (str): Dimension name in Druid
        verbose_name (str): Human-readable dimension name
        is_active (bool): Whether dimension is active for queries
        type (str): Dimension data type ('STRING', 'LONG', 'FLOAT', etc.)
        groupby (bool): Whether dimension can be used for grouping
        filterable (bool): Whether dimension can be filtered
        description (str): Dimension description
        datasource_id (int): Foreign key to Datasource
        datasource (Datasource): Reference to parent datasource
        is_dttm (bool): Whether dimension contains datetime data
        expression (str): Custom expression for computed dimensions
    """
    
    @property
    def isnum(self):
        """Check if dimension is numeric type."""
        return self.type in ('LONG', 'DOUBLE', 'FLOAT')
    
    def generate_metrics(self):
        """Generate default metrics for this dimension."""
    
    def __repr__(self):
        """String representation of the column."""
        return self.column_name

Druid Metrics

Define and manage Druid metrics including aggregations, post-aggregations, and custom expressions.

class Metric(Model, AuditMixin):
    """
    Druid-based metric definition for datasources.
    
    Attributes:
        id (int): Primary key
        metric_name (str): Unique metric identifier
        verbose_name (str): Human-readable metric name
        metric_type (str): Type of metric ('longSum', 'doubleSum', 'count', etc.)
        json (str): JSON configuration for complex metrics
        description (str): Metric description
        is_restricted (bool): Whether metric has access restrictions
        datasource_id (int): Foreign key to Datasource
        datasource (Datasource): Reference to parent datasource
    """
    
    @property
    def json_obj(self):
        """
        Get parsed JSON configuration for the metric.
        
        Returns:
            dict: Parsed JSON configuration object
        """

Usage Examples

Basic Druid Querying

from panoramix.models import Cluster, Datasource

# Get Druid cluster and datasource
cluster = Cluster.query.filter_by(cluster_name='production').first()
datasource = Datasource.query.filter_by(
    datasource_name='events', 
    cluster=cluster
).first()

# Time series query
result = datasource.query(
    groupby=['country'],
    metrics=['count', 'sum__revenue'],
    granularity='hour',
    since='24 hours ago',
    until='now'
)

print(result.df)

Real-time Analytics

# High-frequency real-time query
result = datasource.query(
    groupby=['event_type', 'platform'],
    metrics=['count', 'unique__user_id'],
    granularity='minute',
    since='1 hour ago',
    until='now',
    where="country = 'US'",
    row_limit=100
)

# Access real-time event data
events_df = result.df
print(f"Query executed in {result.duration} seconds")

Custom Metrics and Post-Aggregations

# Query with custom metrics
result = datasource.query(
    groupby=['campaign_id'],
    metrics=['sum__impressions', 'sum__clicks', 'click_through_rate'],
    having='sum__impressions > 1000',
    limit_metric='click_through_rate',
    order_desc=True,
    row_limit=10
)

Datasource Synchronization

# Sync datasource metadata from Druid
new_datasource = Datasource.sync_to_db('new_events', cluster)

# Refresh all datasources in a cluster
cluster.refresh_datasources()

# Get metric configuration
metric_config = datasource.get_metric_obj('conversion_rate')
print(metric_config.json)  # Metric definition JSON

Properties and Helpers

class Datasource:
    @property
    def datasource_link(self):
        """HTML link to datasource visualization view"""
    
    @property
    def metrics_combo(self):
        """List of available metrics as form choices"""
    
    @property
    def column_names(self):
        """List of all dimension names"""
        
    @property
    def groupby_column_names(self):
        """List of dimensions available for grouping"""
        
    @property  
    def filterable_column_names(self):
        """List of dimensions available for filtering"""