Batch loading system for preventing N+1 queries and optimizing database access patterns in GraphQL Ruby.
The main DataLoader class that coordinates batch loading and caching.
class GraphQL::Dataloader
def initialize
# Source management
def with(source_class, *args)
def get_source(source_class, *args)
# Execution
def yield
def run
def run_isolated
# State management
def cleanup
def append_job(&block)
endUsage Examples:
# Enable DataLoader in schema
class MySchema < GraphQL::Schema
query QueryType
use GraphQL::Dataloader
end
# Using DataLoader in resolvers
class UserType < GraphQL::Schema::Object
field :posts, [PostType], null: true
def posts
# Use DataLoader to batch load posts
dataloader.with(PostsByUserLoader, object.id).load
end
end
# Manual DataLoader usage
dataloader = GraphQL::Dataloader.new
user_loader = dataloader.with(UserLoader)
# Load single item
user = user_loader.load("123")
# Load multiple items
users = user_loader.load_many(["123", "456", "789"])
# Execute all pending loads
results = dataloader.run do
[user, users]
endBase class for implementing custom batch loading logic.
class GraphQL::Dataloader::Source
def initialize
# Must implement: batch fetch logic
def fetch(keys)
# Loading interface
def load(key)
def load_many(keys)
# Caching
def cache_key_for(key)
def results
# Utilities
def pending?(key)
def loaded?(key)
endUsage Examples:
# Basic user loader
class UserLoader < GraphQL::Dataloader::Source
def fetch(user_ids)
# Single database query for all requested users
User.where(id: user_ids).index_by(&:id)
end
end
# Posts by user loader with ordering
class PostsByUserLoader < GraphQL::Dataloader::Source
def initialize(order_by: :created_at)
@order_by = order_by
super()
end
def fetch(user_ids)
# Group posts by user_id
posts = Post.where(user_id: user_ids).order(@order_by)
posts.group_by(&:user_id)
end
def cache_key_for(user_id)
# Include order_by in cache key for different sorting
"#{user_id}:#{@order_by}"
end
end
# Association loader with conditions
class AssociationLoader < GraphQL::Dataloader::Source
def initialize(model_class, association_name, conditions = {})
@model_class = model_class
@association_name = association_name
@conditions = conditions
super()
end
def fetch(object_ids)
scope = @model_class.where(id: object_ids)
scope = scope.joins(@association_name)
scope = scope.where(@conditions) if @conditions.any?
# Preload the association
records = scope.includes(@association_name)
records.index_by(&:id)
end
end
# Count loader for efficient counting
class CountLoader < GraphQL::Dataloader::Source
def initialize(model_class, foreign_key)
@model_class = model_class
@foreign_key = foreign_key
super()
end
def fetch(parent_ids)
counts = @model_class
.where(@foreign_key => parent_ids)
.group(@foreign_key)
.count
# Ensure all requested IDs have a count (default to 0)
parent_ids.each_with_object({}) do |id, result|
result[id] = counts[id] || 0
end
end
end
# Using custom loaders
class UserType < GraphQL::Schema::Object
field :posts, [PostType], null: true do
argument :order_by, String, required: false, default_value: "created_at"
end
field :post_count, Int, null: false
field :published_posts, [PostType], null: true
def posts(order_by: "created_at")
dataloader.with(PostsByUserLoader, order_by: order_by.to_sym).load(object.id)
end
def post_count
dataloader.with(CountLoader, Post, :user_id).load(object.id)
end
def published_posts
dataloader.with(AssociationLoader, User, :posts, { status: :published }).load(object.id)
end
endHandle asynchronous loading with promise-like interface.
# DataLoader returns promise-like objects
class GraphQL::Dataloader::Request
def load
def load_many
def then(&block)
def sync
def fulfilled?
def rejected?
def value
def reason
endUsage Examples:
# Chaining operations with then
def user_with_post_count
dataloader.with(UserLoader).load(object.user_id).then do |user|
dataloader.with(CountLoader, Post, :user_id).load(user.id).then do |count|
{ user: user, post_count: count }
end
end
end
# Loading dependent data
def author_with_latest_post
user_promise = dataloader.with(UserLoader).load(object.author_id)
user_promise.then do |user|
latest_post_promise = dataloader
.with(LatestPostLoader)
.load(user.id)
latest_post_promise.then do |latest_post|
{
user: user,
latest_post: latest_post
}
end
end
end
# Error handling in promises
def safe_user_load
dataloader.with(UserLoader).load(object.user_id).then do |user|
user || { name: "Unknown User", email: nil }
rescue => error
context.add_error(GraphQL::ExecutionError.new("Failed to load user: #{error.message}"))
nil
end
endComplex loading scenarios and performance optimization techniques.
# Nested loading sources
class NestedLoader < GraphQL::Dataloader::Source
def initialize(source_class, nested_key)
@source_class = source_class
@nested_key = nested_key
super()
end
def fetch(keys)
# Load parent objects first
parents = dataloader.with(@source_class).load_many(keys).sync
# Extract nested keys
nested_keys = parents.flat_map { |parent| parent&.send(@nested_key) }.compact.uniq
# Load nested objects
nested_objects = dataloader.with(RelatedLoader).load_many(nested_keys).sync
# Map back to original keys
keys.each_with_object({}) do |key, result|
parent = parents[keys.index(key)]
result[key] = nested_objects.select { |obj| parent&.send(@nested_key)&.include?(obj.id) }
end
end
endUsage Examples:
# Conditional loading based on permissions
class SecurePostLoader < GraphQL::Dataloader::Source
def initialize(current_user)
@current_user = current_user
super()
end
def fetch(post_ids)
scope = Post.where(id: post_ids)
# Apply permission filtering
unless @current_user&.admin?
scope = scope.where(published: true)
scope = scope.where.not(private: true) unless @current_user
end
scope.index_by(&:id)
end
def cache_key_for(post_id)
# Include user permissions in cache key
role = @current_user&.admin? ? "admin" : (@current_user ? "user" : "guest")
"#{post_id}:#{role}"
end
end
# Multi-level caching loader
class CachedUserLoader < GraphQL::Dataloader::Source
def fetch(user_ids)
# Try Rails cache first
cached_users = {}
uncached_ids = []
user_ids.each do |id|
cached_user = Rails.cache.read("user:#{id}")
if cached_user
cached_users[id] = cached_user
else
uncached_ids << id
end
end
# Load uncached users from database
if uncached_ids.any?
fresh_users = User.where(id: uncached_ids).index_by(&:id)
# Cache the fresh users
fresh_users.each do |id, user|
Rails.cache.write("user:#{id}", user, expires_in: 1.hour)
cached_users[id] = user
end
end
cached_users
end
end
# Aggregation loader for statistics
class UserStatsLoader < GraphQL::Dataloader::Source
def fetch(user_ids)
# Single query to get all stats
stats = User.joins(:posts, :comments)
.where(id: user_ids)
.group(:id)
.select(
'users.id',
'COUNT(DISTINCT posts.id) as post_count',
'COUNT(DISTINCT comments.id) as comment_count',
'AVG(posts.like_count) as avg_likes'
)
# Convert to hash format
stats.each_with_object({}) do |stat, result|
result[stat.id] = {
post_count: stat.post_count,
comment_count: stat.comment_count,
avg_likes: stat.avg_likes&.to_f || 0.0
}
end
end
end
# Using advanced patterns
class UserType < GraphQL::Schema::Object
field :secure_posts, [PostType], null: true
field :cached_profile, UserProfileType, null: true
field :statistics, UserStatsType, null: false
def secure_posts
dataloader.with(SecurePostLoader, context[:current_user]).load_many(object.post_ids)
end
def cached_profile
dataloader.with(CachedUserLoader).load(object.id)
end
def statistics
dataloader.with(UserStatsLoader).load(object.id)
end
endTools and techniques for monitoring and optimizing DataLoader performance.
# Schema configuration for performance
class MySchema < GraphQL::Schema
use GraphQL::Dataloader
# Enable query analysis
query_analyzer GraphQL::Analysis::QueryComplexity
query_analyzer GraphQL::Analysis::QueryDepth
# Custom analyzer for DataLoader metrics
query_analyzer DataLoaderAnalyzer
end
# Custom performance analyzer
class DataLoaderAnalyzer < GraphQL::Analysis::Analyzer
def initialize(query)
super
@dataloader_calls = 0
@batch_sizes = []
end
def on_enter_field(node, parent, visitor)
# Track potential DataLoader usage
if field_uses_dataloader?(visitor.field_definition)
@dataloader_calls += 1
end
end
def result
{
dataloader_calls: @dataloader_calls,
estimated_queries: estimate_query_count
}
end
endUsage Examples:
# Performance monitoring in resolvers
def monitored_posts
start_time = Time.current
result = dataloader.with(PostsByUserLoader).load(object.id)
# Log performance metrics
Rails.logger.info "DataLoader fetch took #{Time.current - start_time}s"
result
end
# Batch size optimization
class OptimizedPostLoader < GraphQL::Dataloader::Source
MAX_BATCH_SIZE = 100
def fetch(user_ids)
# Limit batch size to prevent large queries
if user_ids.size > MAX_BATCH_SIZE
Rails.logger.warn "Large DataLoader batch: #{user_ids.size} users"
end
Post.where(user_id: user_ids.first(MAX_BATCH_SIZE))
.includes(:user, :tags)
.group_by(&:user_id)
end
end
# Memory usage monitoring
class MemoryAwareLoader < GraphQL::Dataloader::Source
def fetch(keys)
start_memory = get_memory_usage
result = expensive_database_operation(keys)
end_memory = get_memory_usage
memory_used = end_memory - start_memory
if memory_used > 100.megabytes
Rails.logger.warn "High memory usage in DataLoader: #{memory_used / 1.megabyte}MB"
end
result
end
private
def get_memory_usage
`ps -o rss= -p #{Process.pid}`.to_i.kilobytes
end
end
# Query complexity tracking
class QueryComplexityTracker
def self.track_query(query, result)
complexity = result.dig("extensions", "complexity") || 0
dataloader_batches = count_dataloader_batches(query.context)
Rails.logger.info "Query complexity: #{complexity}, DataLoader batches: #{dataloader_batches}"
# Alert on high complexity
if complexity > 1000
Rails.logger.warn "High complexity query detected: #{complexity}"
end
end
def self.count_dataloader_batches(context)
dataloader = context[:dataloader]
return 0 unless dataloader
dataloader.sources.sum(&:batch_count)
end
end