A Golang job scheduling library that lets you run Go functions at pre-determined intervals using cron expressions, fixed durations, daily, weekly, monthly, or one-time schedules with support for distributed deployments.
NTP, time alignment, and best practices for distributed deployments.
Clock synchronization is critical for distributed locking to work fairly. When clocks are out of sync, one instance may consistently acquire locks because it schedules jobs earlier than others.
Key principle: All instances should agree on "what time is it now" within a small margin (ideally < 1 second).
Instance A clock: 09:00:05
Instance B clock: 09:00:00
Job scheduled for 09:00:00:
- Instance B schedules at 09:00:00 → tries to acquire lock
- Instance A still thinks it's 08:59:55 → doesn't schedule yet
- Instance B always winsBoth instances: 09:00:00 (±100ms)
Job scheduled for 09:00:00:
- Both instances schedule at ~09:00:00
- Race to acquire lock (fair competition)
- Either instance can win# Install NTP
sudo apt-get update
sudo apt-get install ntp
# Start NTP service
sudo systemctl start ntp
sudo systemctl enable ntp
# Verify synchronization
ntpq -pOutput:
remote refid st t when poll reach delay offset jitter
==============================================================================
*ntp.ubuntu.com .GPS. 1 u 64 128 377 1.234 0.567 0.123* indicates synchronized server, offset should be < 100ms.
Mount host time to containers:
# docker-compose.yml
version: '3'
services:
app:
image: myapp
volumes:
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:roOr use NTP in container:
FROM golang:1.21
RUN apt-get update && apt-get install -y ntp
CMD ["ntpd", "-n"]Use NTP DaemonSet or host time:
apiVersion: v1
kind: Pod
metadata:
name: gocron-worker
spec:
containers:
- name: app
image: myapp
volumeMounts:
- name: tz-config
mountPath: /etc/localtime
readOnly: true
volumes:
- name: tz-config
hostPath:
path: /etc/localtimeMost cloud providers sync time automatically:
AWS: Amazon Time Sync Service (169.254.169.123) GCP: Metadata server NTP Azure: Windows Time service
Verify with:
# Linux
timedatectl status
# Should show:
# Network time on: yes
# NTP synchronized: yes# Linux
ntpq -p
# Look for:
# - offset column (should be < 100ms)
# - * prefix on one server (indicates sync)# On each instance
date +"%Y-%m-%d %H:%M:%S.%3N"
# Compare outputs:
# Instance A: 2024-01-15 09:00:00.123
# Instance B: 2024-01-15 09:00:00.156
# Diff: 33ms (acceptable)func checkClockSkew(instances []string) error {
times := make([]time.Time, len(instances))
for i, instance := range instances {
t, err := fetchRemoteTime(instance)
if err != nil {
return err
}
times[i] = t
}
// Find max difference
var maxSkew time.Duration
for i := 0; i < len(times); i++ {
for j := i + 1; j < len(times); j++ {
skew := times[i].Sub(times[j])
if skew < 0 {
skew = -skew
}
if skew > maxSkew {
maxSkew = skew
}
}
}
log.Printf("Max clock skew: %v", maxSkew)
if maxSkew > time.Second {
return fmt.Errorf("clock skew too high: %v", maxSkew)
}
return nil
}Without alignment, instances with slight clock differences schedule at different times:
// BAD: Each instance schedules based on current time
j, _ := s.NewJob(
gocron.DurationJob(5*time.Minute),
gocron.NewTask(myFunc),
)
// Instance A: starts at 09:00:03
// Instance B: starts at 09:00:01
// Result: B always wins lockSolution: Align to time boundaries:
// GOOD: All instances align to same time boundary
now := time.Now()
next5Min := now.Truncate(5*time.Minute).Add(5*time.Minute)
j, _ := s.NewJob(
gocron.DurationJob(5*time.Minute),
gocron.NewTask(myFunc),
gocron.WithStartAt(gocron.WithStartDateTime(next5Min)),
)
// All instances: start at 09:05:00 (within ms of each other)
// Result: Fair lock competition// Top of every minute
nextMinute := time.Now().Truncate(time.Minute).Add(time.Minute)
// Top of every 5 minutes
next5Min := time.Now().Truncate(5*time.Minute).Add(5*time.Minute)
// Top of every hour
nextHour := time.Now().Truncate(time.Hour).Add(time.Hour)
// Top of every day (midnight)
nextDay := time.Now().Truncate(24*time.Hour).Add(24*time.Hour)func alignToInterval(interval time.Duration) time.Time {
now := time.Now()
return now.Truncate(interval).Add(interval)
}
// Usage
j, _ := s.NewJob(
gocron.DurationJob(5*time.Minute),
gocron.NewTask(myFunc),
gocron.WithStartAt(gocron.WithStartDateTime(alignToInterval(5*time.Minute))),
)Cron schedules naturally align to clock time:
// Runs at top of every hour (all instances aligned)
j, _ := s.NewJob(
gocron.CronJob("0 * * * *", false),
gocron.NewTask(myFunc),
)No additional alignment needed.
These also align naturally:
// Runs every day at 9 AM (all instances aligned)
j, _ := s.NewJob(
gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(9, 0, 0))),
gocron.NewTask(myFunc),
)Note: Requires synchronized clocks to ensure all instances agree on "9 AM".
| Skew | Impact | Recommendation |
|---|---|---|
| < 100ms | Negligible | Ideal |
| 100ms - 1s | Minor impact | Acceptable |
| 1s - 5s | Noticeable unfairness | Fix soon |
| > 5s | Severe unfairness | Fix immediately |
| Job Interval | Max Acceptable Skew |
|---|---|
| Every second | 100ms |
| Every minute | 1s |
| Every 5 minutes | 5s |
| Every hour | 1 minute |
Rule of thumb: Skew should be < 1% of job interval.
type clockMonitor struct {
instances []string
threshold time.Duration
}
func (m *clockMonitor) checkSkew(ctx context.Context) error {
localTime := time.Now()
for _, instance := range m.instances {
remoteTime, err := fetchRemoteTime(ctx, instance)
if err != nil {
log.Printf("Failed to fetch time from %s: %v", instance, err)
continue
}
skew := localTime.Sub(remoteTime)
if skew < 0 {
skew = -skew
}
if skew > m.threshold {
log.Printf("High clock skew with %s: %v", instance, skew)
metrics.RecordClockSkew(instance, skew.Seconds())
if skew > 5*time.Second {
alerting.SendAlert("Critical clock skew", fmt.Sprintf("%s: %v", instance, skew))
}
}
}
return nil
}
// Run periodically
go func() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
monitor := &clockMonitor{
instances: []string{"instance-a:8080", "instance-b:8080"},
threshold: time.Second,
}
for range ticker.C {
monitor.checkSkew(context.Background())
}
}()# Create monitoring script
cat > /usr/local/bin/check-ntp.sh <<'EOF'
#!/bin/bash
OFFSET=$(ntpq -c peers | awk '/^\*/ {print $9}')
if [ -z "$OFFSET" ]; then
echo "NTP not synchronized"
exit 1
fi
# Convert to absolute value
OFFSET=${OFFSET#-}
# Check if offset > 100ms
if (( $(echo "$OFFSET > 100" | bc -l) )); then
echo "NTP offset too high: ${OFFSET}ms"
exit 1
fi
echo "NTP offset OK: ${OFFSET}ms"
EOF
chmod +x /usr/local/bin/check-ntp.sh
# Add to cron
echo "*/5 * * * * /usr/local/bin/check-ntp.sh" | crontab -# Install and enable NTP on all instances
sudo apt-get install ntp
sudo systemctl enable ntp// For distributed deployments
j, _ := s.NewJob(
gocron.DurationJob(5*time.Minute),
gocron.NewTask(myFunc),
gocron.WithStartAt(gocron.WithStartDateTime(alignToInterval(5*time.Minute))),
)go func() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for range ticker.C {
if err := checkClockSkew(instances); err != nil {
log.Printf("Clock skew detected: %v", err)
metrics.RecordClockSkewError()
}
}
}()// Prefer this (naturally aligned)
j, _ := s.NewJob(
gocron.CronJob("*/5 * * * *", false),
gocron.NewTask(myFunc),
)
// Over this (requires manual alignment)
j, _ := s.NewJob(
gocron.DurationJob(5*time.Minute),
gocron.NewTask(myFunc),
gocron.WithStartAt(gocron.WithStartDateTime(alignToInterval(5*time.Minute))),
)loc, _ := time.LoadLocation("America/New_York")
s, _ := gocron.NewScheduler(
gocron.WithLocation(loc),
)Ensures all instances use the same timezone.
// In tests, simulate clock skew
type skewedClock struct {
offset time.Duration
}
func (c *skewedClock) Now() time.Time {
return time.Now().Add(c.offset)
}
// Test with 5-second skew
clock := &skewedClock{offset: 5 * time.Second}
// ... test distributed locking behaviorSymptom: Same instance consistently acquires all locks.
Check clock skew:
# On each instance
date +"%Y-%m-%d %H:%M:%S.%3N"
# Compare outputsSolutions:
Symptom: Jobs run at unpredictable times across instances.
Cause: Clock drift causing different schedule calculations.
Solutions:
Symptom: No instance acquires locks.
Cause: Severe clock skew (> 1 minute) causing all instances to miss lock window.
Solutions:
sudo ntpdate -s time.nist.govsudo systemctl restart ntpInstall with Tessl CLI
npx tessl i tessl/golang-github-com-go-co-op-gocron-v2@2.19.1docs
api
examples
guides