Gelly: Flink Graph API - A comprehensive graph processing library for Apache Flink
—
Gelly provides comprehensive graph generation utilities for creating synthetic graphs for testing, benchmarking, and algorithm development. The generators support various graph topologies and probability distributions, enabling creation of graphs with specific structural properties.
All graph generators extend the abstract base class providing common functionality.
public abstract class AbstractGraphGenerator<K, VV, EV> {
public abstract Graph<K, VV, EV> generate() throws Exception;
public AbstractGraphGenerator<K, VV, EV> setParallelism(int parallelism)
protected ExecutionEnvironment getExecutionEnvironment()
}Common utilities and helper functions for graph generation.
public class GraphGeneratorUtils {
// Utility methods for graph generation
public static <T> DataSet<T> parallelizeArray(ExecutionEnvironment env, T[] array, int parallelism)
// Additional utility methods
}Generate complete graphs where every vertex is connected to every other vertex.
public class CompleteGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public CompleteGraph(ExecutionEnvironment env, long numVertices)
}Usage Example:
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Create complete graph with 10 vertices
CompleteGraph<LongValue, NullValue, NullValue> completeGen =
new CompleteGraph<>(env, 10L);
Graph<LongValue, NullValue, NullValue> complete = completeGen.generate();
// Complete graph properties:
// - 10 vertices (0, 1, 2, ..., 9)
// - 45 edges (n*(n-1)/2 for undirected)
// - Every vertex connected to every other vertex
System.out.println("Vertices: " + complete.numberOfVertices()); // 10
System.out.println("Edges: " + complete.numberOfEdges()); // 45Generate graphs with vertices but no edges.
public class EmptyGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public EmptyGraph(ExecutionEnvironment env, long numVertices)
}Usage Example:
// Create empty graph with 5 vertices and no edges
EmptyGraph<LongValue, NullValue, NullValue> emptyGen =
new EmptyGraph<>(env, 5L);
Graph<LongValue, NullValue, NullValue> empty = emptyGen.generate();
System.out.println("Vertices: " + empty.numberOfVertices()); // 5
System.out.println("Edges: " + empty.numberOfEdges()); // 0Generate linear path graphs where vertices form a single chain.
public class PathGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public PathGraph(ExecutionEnvironment env, long numVertices)
}Usage Example:
// Create path graph: 0-1-2-3-4
PathGraph<LongValue, NullValue, NullValue> pathGen =
new PathGraph<>(env, 5L);
Graph<LongValue, NullValue, NullValue> path = pathGen.generate();
// Path graph properties:
// - 5 vertices: 0, 1, 2, 3, 4
// - 4 edges: (0,1), (1,2), (2,3), (3,4)
// - Forms a single linear chain
System.out.println("Vertices: " + path.numberOfVertices()); // 5
System.out.println("Edges: " + path.numberOfEdges()); // 4Generate n-dimensional hypercube graphs.
public class HypercubeGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public HypercubeGraph(ExecutionEnvironment env, long dimensions)
}Usage Example:
// Create 3-dimensional hypercube (cube)
HypercubeGraph<LongValue, NullValue, NullValue> cubeGen =
new HypercubeGraph<>(env, 3L);
Graph<LongValue, NullValue, NullValue> cube = cubeGen.generate();
// 3D hypercube properties:
// - 8 vertices (2^3)
// - 12 edges (3 * 2^2)
// - Each vertex connected to 3 neighbors (differs in 1 bit)
System.out.println("Vertices: " + cube.numberOfVertices()); // 8
System.out.println("Edges: " + cube.numberOfEdges()); // 12Generate graphs with exactly one edge between two vertices.
public class SingletonEdgeGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public SingletonEdgeGraph(ExecutionEnvironment env, K source, K target, EV edgeValue)
}Usage Example:
// Create graph with single edge from vertex 1 to vertex 2
SingletonEdgeGraph<LongValue, NullValue, DoubleValue> singletonGen =
new SingletonEdgeGraph<>(env,
new LongValue(1L),
new LongValue(2L),
new DoubleValue(1.5));
Graph<LongValue, NullValue, DoubleValue> singleton = singletonGen.generate();
System.out.println("Vertices: " + singleton.numberOfVertices()); // 2
System.out.println("Edges: " + singleton.numberOfEdges()); // 1Generate echo graphs where each vertex has a self-loop.
public class EchoGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV> {
public EchoGraph(ExecutionEnvironment env, long numVertices)
}Usage Example:
// Create echo graph with self-loops
EchoGraph<LongValue, NullValue, NullValue> echoGen =
new EchoGraph<>(env, 4L);
Graph<LongValue, NullValue, NullValue> echo = echoGen.generate();
// Echo graph properties:
// - 4 vertices: 0, 1, 2, 3
// - 4 self-loop edges: (0,0), (1,1), (2,2), (3,3)
System.out.println("Vertices: " + echo.numberOfVertices()); // 4
System.out.println("Edges: " + echo.numberOfEdges()); // 4Generators for creating random graphs with various probability distributions and structural properties.
The org.apache.flink.graph.generator.random package contains various random graph generators:
// Example random generators (specific classes may vary)
public class ErdosRenyiGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV>
public class BarabasiAlbertGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV>
public class WattsStrogatzGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV>
public class RMatGraph<K, VV, EV> extends AbstractGraphGenerator<K, VV, EV>Generate random graphs where each possible edge exists with a given probability.
Usage Example:
// Create Erdős–Rényi graph with 100 vertices and edge probability 0.1
ErdosRenyiGraph<LongValue, NullValue, NullValue> erGen =
new ErdosRenyiGraph<>(env, 100L, 0.1);
Graph<LongValue, NullValue, NullValue> randomGraph = erGen.generate();
// Expected properties:
// - 100 vertices
// - ~495 edges (binomial distribution around n*(n-1)/2 * p)
// - Random connectivity structureGenerate scale-free networks using various models.
Usage Example:
// Create Barabási–Albert scale-free network
BarabasiAlbertGraph<LongValue, NullValue, NullValue> baGen =
new BarabasiAlbertGraph<>(env, 1000L, 3); // 1000 vertices, 3 edges per new vertex
Graph<LongValue, NullValue, NullValue> scaleFreee = baGen.generate();
// Scale-free properties:
// - Power-law degree distribution
// - Hub vertices with very high degree
// - Most vertices with low degreeGenerators can be configured with custom vertex and edge value initializers.
Usage Example:
// Complete graph with custom vertex values
CompleteGraph<LongValue, StringValue, DoubleValue> customGen =
new CompleteGraph<LongValue, StringValue, DoubleValue>(env, 5L) {
@Override
protected StringValue getVertexValue(LongValue vertexId) {
return new StringValue("vertex_" + vertexId.getValue());
}
@Override
protected DoubleValue getEdgeValue(LongValue source, LongValue target) {
return new DoubleValue(Math.random());
}
};
Graph<LongValue, StringValue, DoubleValue> customGraph = customGen.generate();Set parallelism for distributed graph generation.
Usage Example:
// Generate large complete graph with high parallelism
CompleteGraph<LongValue, NullValue, NullValue> largeGen =
new CompleteGraph<>(env, 10000L);
largeGen.setParallelism(16); // Use 16 parallel tasks
Graph<LongValue, NullValue, NullValue> largeGraph = largeGen.generate();Combine multiple generators to create complex graph structures.
Usage Example:
// Create composite graph structure
Graph<LongValue, NullValue, NullValue> path = new PathGraph<>(env, 5L).generate();
Graph<LongValue, NullValue, NullValue> complete = new CompleteGraph<>(env, 3L).generate();
// Translate IDs to avoid conflicts
Graph<LongValue, NullValue, NullValue> translatedComplete = complete
.translateGraphIds(new LongValueAddOffset(new LongValue(10L)));
// Union the graphs
Graph<LongValue, NullValue, NullValue> composite = path.union(translatedComplete);
System.out.println("Composite vertices: " + composite.numberOfVertices()); // 8
System.out.println("Composite edges: " + composite.numberOfEdges()); // 7Large graph generation can be memory-intensive:
// Configure memory for large graph generation
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setTaskManagerMemory(4096); // 4GB
// Generate large graph
CompleteGraph<LongValue, NullValue, NullValue> gen =
new CompleteGraph<>(env, 50000L); // Will create ~1.25B edges
gen.setParallelism(32); // High parallelism for distributionChoose appropriate data types for performance:
// Efficient types for large graphs
Graph<LongValue, NullValue, NullValue> efficientGraph = /* generator */;
// Less efficient for large-scale generation
Graph<String, ComplexObject, ComplexEdgeValue> inefficientGraph = /* generator */;// Efficient: Generate once, reuse
CompleteGraph<LongValue, NullValue, NullValue> generator = new CompleteGraph<>(env, 1000L);
Graph<LongValue, NullValue, NullValue> baseGraph = generator.generate();
// Create variations
Graph<LongValue, StringValue, NullValue> variation1 = baseGraph.mapVertices(v -> new StringValue("v" + v.getId()));
Graph<LongValue, NullValue, DoubleValue> variation2 = baseGraph.mapEdges(e -> new DoubleValue(1.0));
// Inefficient: Multiple generation calls
// CompleteGraph<LongValue, NullValue, NullValue> gen1 = new CompleteGraph<>(env, 1000L);
// CompleteGraph<LongValue, NullValue, NullValue> gen2 = new CompleteGraph<>(env, 1000L);
// Graph<LongValue, NullValue, NullValue> graph1 = gen1.generate();
// Graph<LongValue, NullValue, NullValue> graph2 = gen2.generate();Install with Tessl CLI
npx tessl i tessl/maven-org-apache-flink--flink-gelly-2-10