CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/golang-github-com-go-co-op-gocron-v2

A Golang job scheduling library that lets you run Go functions at pre-determined intervals using cron expressions, fixed durations, daily, weekly, monthly, or one-time schedules with support for distributed deployments.

Overview
Eval results
Files

metrics.mddocs/guides/observability/

Metrics

Guide to collecting and exporting job execution metrics.

Overview

Gocron provides hooks for collecting metrics about job execution through the Monitor interface.

Monitor Interface

type Monitor interface {
    RecordJobTiming(
        start time.Time,
        duration time.Duration,
        jobID uuid.UUID,
        jobName string,
        tags []string,
    )
}

Called after each job execution with timing information.

Basic Metrics Implementation

type metricsMonitor struct {
    executions map[string]int
    durations  map[string][]time.Duration
    mu         sync.Mutex
}

func (m *metricsMonitor) RecordJobTiming(
    start time.Time,
    duration time.Duration,
    jobID uuid.UUID,
    jobName string,
    tags []string,
) {
    m.mu.Lock()
    defer m.mu.Unlock()

    m.executions[jobName]++
    m.durations[jobName] = append(m.durations[jobName], duration)

    log.Printf("Metrics: %s executed in %v (total: %d)",
        jobName, duration, m.executions[jobName])
}

s, _ := gocron.NewScheduler(
    gocron.WithMonitor(&metricsMonitor{
        executions: make(map[string]int),
        durations:  make(map[string][]time.Duration),
    }),
)

Prometheus Integration

Setup

import (
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
    jobExecutions = prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "gocron_job_executions_total",
            Help: "Total number of job executions",
        },
        []string{"job_name"},
    )

    jobDuration = prometheus.NewHistogramVec(
        prometheus.HistogramOpts{
            Name: "gocron_job_duration_seconds",
            Help: "Job execution duration in seconds",
            Buckets: prometheus.DefBuckets,
        },
        []string{"job_name"},
    )

    jobErrors = prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "gocron_job_errors_total",
            Help: "Total number of job errors",
        },
        []string{"job_name"},
    )
)

func init() {
    prometheus.MustRegister(jobExecutions)
    prometheus.MustRegister(jobDuration)
    prometheus.MustRegister(jobErrors)
}

Monitor Implementation

type prometheusMonitor struct{}

func (m *prometheusMonitor) RecordJobTiming(
    start time.Time,
    duration time.Duration,
    jobID uuid.UUID,
    jobName string,
    tags []string,
) {
    jobExecutions.WithLabelValues(jobName).Inc()
    jobDuration.WithLabelValues(jobName).Observe(duration.Seconds())
}

// Also implement SchedulerMonitor for errors
func (m *prometheusMonitor) JobCompleted(
    jobID uuid.UUID,
    job gocron.Job,
    err error,
) {
    if err != nil {
        jobErrors.WithLabelValues(job.Name()).Inc()
    }
}

s, _ := gocron.NewScheduler(
    gocron.WithMonitor(&prometheusMonitor{}),
    gocron.WithSchedulerMonitor(&prometheusMonitor{}),
)

Expose Metrics Endpoint

http.Handle("/metrics", promhttp.Handler())
go http.ListenAndServe(":2112", nil)

Example Queries

# Total executions per job
sum by (job_name) (gocron_job_executions_total)

# Average duration per job
rate(gocron_job_duration_seconds_sum[5m]) /
rate(gocron_job_duration_seconds_count[5m])

# Error rate
rate(gocron_job_errors_total[5m])

# 99th percentile duration
histogram_quantile(0.99,
  rate(gocron_job_duration_seconds_bucket[5m])
)

StatsD Integration

import "github.com/DataDog/datadog-go/statsd"

type statsdMonitor struct {
    client *statsd.Client
}

func newStatsdMonitor(addr string) (*statsdMonitor, error) {
    client, err := statsd.New(addr)
    if err != nil {
        return nil, err
    }
    return &statsdMonitor{client: client}, nil
}

func (m *statsdMonitor) RecordJobTiming(
    start time.Time,
    duration time.Duration,
    jobID uuid.UUID,
    jobName string,
    tags []string,
) {
    tags = append(tags, fmt.Sprintf("job:%s", jobName))

    m.client.Timing("gocron.job.duration", duration, tags, 1.0)
    m.client.Incr("gocron.job.executions", tags, 1.0)
}

func (m *statsdMonitor) JobCompleted(
    jobID uuid.UUID,
    job gocron.Job,
    err error,
) {
    tags := []string{fmt.Sprintf("job:%s", job.Name())}
    if err != nil {
        m.client.Incr("gocron.job.errors", tags, 1.0)
    }
}

OpenTelemetry Integration

import (
    "go.opentelemetry.io/otel"
    "go.opentelemetry.io/otel/attribute"
    "go.opentelemetry.io/otel/metric"
)

type otelMonitor struct {
    meter    metric.Meter
    duration metric.Float64Histogram
    counter  metric.Int64Counter
}

func newOtelMonitor() (*otelMonitor, error) {
    meter := otel.Meter("gocron")

    duration, err := meter.Float64Histogram(
        "gocron.job.duration",
        metric.WithDescription("Job execution duration"),
        metric.WithUnit("s"),
    )
    if err != nil {
        return nil, err
    }

    counter, err := meter.Int64Counter(
        "gocron.job.executions",
        metric.WithDescription("Job execution count"),
    )
    if err != nil {
        return nil, err
    }

    return &otelMonitor{
        meter:    meter,
        duration: duration,
        counter:  counter,
    }, nil
}

func (m *otelMonitor) RecordJobTiming(
    start time.Time,
    duration time.Duration,
    jobID uuid.UUID,
    jobName string,
    tags []string,
) {
    attrs := []attribute.KeyValue{
        attribute.String("job.name", jobName),
    }

    m.duration.Record(context.Background(), duration.Seconds(), metric.WithAttributes(attrs...))
    m.counter.Add(context.Background(), 1, metric.WithAttributes(attrs...))
}

Custom Metrics

Execution Rate

type rateMonitor struct {
    rates map[string]*rate.Limiter
    mu    sync.Mutex
}

func (m *rateMonitor) RecordJobTiming(
    start time.Time,
    duration time.Duration,
    jobID uuid.UUID,
    jobName string,
    tags []string,
) {
    m.mu.Lock()
    defer m.mu.Unlock()

    if _, ok := m.rates[jobName]; !ok {
        m.rates[jobName] = rate.NewLimiter(rate.Every(time.Minute), 60)
    }

    // Check if execution rate is too high
    if !m.rates[jobName].Allow() {
        log.Printf("WARNING: %s executing too frequently", jobName)
    }
}

Success Rate

type successMonitor struct {
    successes map[string]int
    failures  map[string]int
    mu        sync.Mutex
}

func (m *successMonitor) JobCompleted(
    jobID uuid.UUID,
    job gocron.Job,
    err error,
) {
    m.mu.Lock()
    defer m.mu.Unlock()

    if err != nil {
        m.failures[job.Name()]++
    } else {
        m.successes[job.Name()]++
    }

    // Log success rate every 100 executions
    total := m.successes[job.Name()] + m.failures[job.Name()]
    if total%100 == 0 {
        rate := float64(m.successes[job.Name()]) / float64(total) * 100
        log.Printf("%s success rate: %.2f%%", job.Name(), rate)
    }
}

Metric Types

Counters

Track total occurrences:

jobExecutions.WithLabelValues(jobName).Inc()

Use cases:

  • Total executions
  • Error count
  • Completion count

Histograms

Track value distributions:

jobDuration.WithLabelValues(jobName).Observe(duration.Seconds())

Use cases:

  • Execution duration
  • Queue depth
  • Latency distribution

Gauges

Track current values:

activeJobs.WithLabelValues().Set(float64(len(s.Jobs())))

Use cases:

  • Active jobs
  • Queue size
  • Concurrent executions

Best Practices

1. Use Tags/Labels Wisely

// Good: low cardinality
labels := []string{"job_name", "status"}

// Bad: high cardinality (causes explosion)
labels := []string{"job_id", "timestamp"}

2. Aggregate Metrics

// Good: aggregate before sending
if count%100 == 0 {
    sendMetric("total_executions", count)
}

// Bad: send every execution
sendMetric("execution", 1)

3. Handle Errors Gracefully

func (m *myMonitor) RecordJobTiming(...) {
    if err := m.send(...); err != nil {
        // Log but don't panic
        log.Printf("Failed to send metric: %v", err)
    }
}

4. Batch Metrics

type batchMonitor struct {
    buffer []metric
    mu     sync.Mutex
}

func (m *batchMonitor) RecordJobTiming(...) {
    m.mu.Lock()
    m.buffer = append(m.buffer, metric{...})
    m.mu.Unlock()

    if len(m.buffer) >= 100 {
        m.flush()
    }
}

Dashboard Examples

Grafana Dashboard

{
  "dashboard": {
    "title": "Gocron Metrics",
    "panels": [
      {
        "title": "Execution Rate",
        "targets": [{
          "expr": "rate(gocron_job_executions_total[5m])"
        }]
      },
      {
        "title": "Error Rate",
        "targets": [{
          "expr": "rate(gocron_job_errors_total[5m])"
        }]
      },
      {
        "title": "P99 Duration",
        "targets": [{
          "expr": "histogram_quantile(0.99, rate(gocron_job_duration_seconds_bucket[5m]))"
        }]
      }
    ]
  }
}

See Also

  • Observability Guide - Overview
  • Lifecycle Monitoring Guide - Event monitoring
  • API: Monitoring Types - Interface reference

Install with Tessl CLI

npx tessl i tessl/golang-github-com-go-co-op-gocron-v2@2.19.1

docs

index.md

tile.json