Comprehensive functionality for creating graphs from various data sources and managing graph structure through CRUD operations.
Create graphs from different data sources including DataSets, Collections, CSV files, and various tuple formats.
/**
* Creates a Graph from a DataSet of vertices and a DataSet of edges.
*/
def fromDataSet[K, VV, EV](vertices: DataSet[Vertex[K, VV]], edges: DataSet[Edge[K, EV]],
env: ExecutionEnvironment): Graph[K, VV, EV]
/**
* Creates a Graph from a DataSet of edges.
* Vertices are created automatically and their values are set to NullValue.
*/
def fromDataSet[K, EV](edges: DataSet[Edge[K, EV]],
env: ExecutionEnvironment): Graph[K, NullValue, EV]
/**
* Creates a graph from a DataSet of edges.
* Vertices are created automatically and their values are set by applying the provided
* vertexValueInitializer map function to the vertex ids.
*/
def fromDataSet[K, VV, EV](edges: DataSet[Edge[K, EV]],
vertexValueInitializer: MapFunction[K, VV],
env: ExecutionEnvironment): Graph[K, VV, EV]/**
* Creates a Graph from a Seq of vertices and a Seq of edges.
*/
def fromCollection[K, VV, EV](vertices: Seq[Vertex[K, VV]], edges: Seq[Edge[K, EV]],
env: ExecutionEnvironment): Graph[K, VV, EV]
/**
* Creates a Graph from a Seq of edges.
* Vertices are created automatically and their values are set to NullValue.
*/
def fromCollection[K, EV](edges: Seq[Edge[K, EV]],
env: ExecutionEnvironment): Graph[K, NullValue, EV]
/**
* Creates a graph from a Seq of edges.
* Vertices are created automatically and their values are set by applying the provided
* vertexValueInitializer map function to the vertex ids.
*/
def fromCollection[K, VV, EV](edges: Seq[Edge[K, EV]],
vertexValueInitializer: MapFunction[K, VV],
env: ExecutionEnvironment): Graph[K, VV, EV]/**
* Creates a graph from DataSets of tuples for vertices and for edges.
* The first field of the Tuple2 vertex object will become the vertex ID
* and the second field will become the vertex value.
* The first field of the Tuple3 object for edges will become the source ID,
* the second field will become the target ID, and the third field will become
* the edge value.
*/
def fromTupleDataSet[K, VV, EV](vertices: DataSet[(K, VV)], edges: DataSet[(K, K, EV)],
env: ExecutionEnvironment): Graph[K, VV, EV]
/**
* Creates a Graph from a DataSet of Tuples representing the edges.
* Vertices are created automatically and their values are set to NullValue.
*/
def fromTupleDataSet[K, EV](edges: DataSet[(K, K, EV)],
env: ExecutionEnvironment): Graph[K, NullValue, EV]
/**
* Creates a Graph from a DataSet of Tuples representing the edges.
* Vertices are created automatically and their values are set by applying the provided
* vertexValueInitializer map function to the vertex ids.
*/
def fromTupleDataSet[K, VV, EV](edges: DataSet[(K, K, EV)],
vertexValueInitializer: MapFunction[K, VV],
env: ExecutionEnvironment): Graph[K, VV, EV]/**
* Creates a Graph from a DataSet of Tuple2's representing the edges.
* The first field of the Tuple2 object for edges will become the source ID,
* the second field will become the target ID. The edge value will be set to NullValue.
* Vertices are created automatically and their values are set to NullValue.
*/
def fromTuple2DataSet[K](edges: DataSet[(K, K)],
env: ExecutionEnvironment): Graph[K, NullValue, NullValue]
/**
* Creates a Graph from a DataSet of Tuple2's representing the edges.
* The first field of the Tuple2 object for edges will become the source ID,
* the second field will become the target ID. The edge value will be set to NullValue.
* Vertices are created automatically and their values are set by applying the provided
* vertexValueInitializer map function to the vertex IDs.
*/
def fromTuple2DataSet[K, VV](edges: DataSet[(K, K)],
vertexValueInitializer: MapFunction[K, VV],
env: ExecutionEnvironment): Graph[K, VV, NullValue]/**
* Creates a Graph from CSV files.
* Supports extensive configuration for parsing edges and optionally vertices.
* The edge value is read from the CSV file if EV is not of type NullValue.
* Otherwise the edge value is set to NullValue.
* If the vertex value type VV is specified (unequal NullValue), then the vertex values
* are read from the file specified by pathVertices.
*/
def fromCsvReader[K, VV, EV](
env: ExecutionEnvironment,
pathEdges: String,
pathVertices: String = null,
lineDelimiterVertices: String = "\n",
fieldDelimiterVertices: String = ",",
quoteCharacterVertices: Character = null,
ignoreFirstLineVertices: Boolean = false,
ignoreCommentsVertices: String = null,
lenientVertices: Boolean = false,
includedFieldsVertices: Array[Int] = null,
lineDelimiterEdges: String = "\n",
fieldDelimiterEdges: String = ",",
quoteCharacterEdges: Character = null,
ignoreFirstLineEdges: Boolean = false,
ignoreCommentsEdges: String = null,
lenientEdges: Boolean = false,
includedFieldsEdges: Array[Int] = null,
vertexValueInitializer: MapFunction[K, VV] = null): Graph[K, VV, EV]Usage Examples:
import org.apache.flink.graph.scala._
import org.apache.flink.graph.{Edge, Vertex}
import org.apache.flink.api.scala._
val env = ExecutionEnvironment.getExecutionEnvironment
// From DataSets
val vertices = env.fromCollection(Seq(
new Vertex(1L, "Alice"),
new Vertex(2L, "Bob")
))
val edges = env.fromCollection(Seq(
new Edge(1L, 2L, 0.5)
))
val graph1 = Graph.fromDataSet(vertices, edges, env)
// From Collections
val vertexSeq = Seq(new Vertex(1L, "Alice"), new Vertex(2L, "Bob"))
val edgeSeq = Seq(new Edge(1L, 2L, 0.5))
val graph2 = Graph.fromCollection(vertexSeq, edgeSeq, env)
// From Tuples
val vertexTuples = env.fromCollection(Seq((1L, "Alice"), (2L, "Bob")))
val edgeTuples = env.fromCollection(Seq((1L, 2L, 0.5)))
val graph3 = Graph.fromTupleDataSet(vertexTuples, edgeTuples, env)
// From CSV
val graphFromCsv = Graph.fromCsvReader[Long, String, Double](
env,
pathEdges = "/path/to/edges.csv",
pathVertices = "/path/to/vertices.csv"
)Retrieve graph components in various formats for analysis and processing.
/**
* @return the vertex DataSet.
*/
def getVertices(): DataSet[Vertex[K, VV]]
/**
* @return the edge DataSet.
*/
def getEdges(): DataSet[Edge[K, EV]]
/**
* @return the vertex DataSet as Tuple2.
*/
def getVerticesAsTuple2(): DataSet[(K, VV)]
/**
* @return the edge DataSet as Tuple3.
*/
def getEdgesAsTuple3(): DataSet[(K, K, EV)]
/**
* @return a DataSet of Triplets,
* consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue)
*/
def getTriplets(): DataSet[Triplet[K, VV, EV]]
/**
* @return The IDs of the vertices as DataSet
*/
def getVertexIds(): DataSet[K]
/**
* @return The IDs of the edges as DataSet
*/
def getEdgeIds(): DataSet[(K, K)]Add and remove vertices and edges from existing graphs.
/**
* Adds the input vertex to the graph. If the vertex already
* exists in the graph, it will not be added again.
*/
def addVertex(vertex: Vertex[K, VV]): Graph[K, VV, EV]
/**
* Adds the list of vertices, passed as input, to the graph.
* If the vertices already exist in the graph, they will not be added once more.
*/
def addVertices(vertices: List[Vertex[K, VV]]): Graph[K, VV, EV]
/**
* Adds the given edge to the graph. If the source and target vertices do
* not exist in the graph, they will also be added.
*/
def addEdge(source: Vertex[K, VV], target: Vertex[K, VV], edgeValue: EV): Graph[K, VV, EV]
/**
* Adds the given list edges to the graph.
* When adding an edge for a non-existing set of vertices,
* the edge is considered invalid and ignored.
*/
def addEdges(edges: List[Edge[K, EV]]): Graph[K, VV, EV]/**
* Removes the given vertex and its edges from the graph.
*/
def removeVertex(vertex: Vertex[K, VV]): Graph[K, VV, EV]
/**
* Removes the given vertex and its edges from the graph.
*/
def removeVertices(vertices: List[Vertex[K, VV]]): Graph[K, VV, EV]
/**
* Removes all edges that match the given edge from the graph.
*/
def removeEdge(edge: Edge[K, EV]): Graph[K, VV, EV]
/**
* Removes all the edges that match the edges in the given data set from the graph.
*/
def removeEdges(edges: List[Edge[K, EV]]): Graph[K, VV, EV]Perform mathematical set operations between graphs.
/**
* Performs union on the vertices and edges sets of the input graphs
* removing duplicate vertices but maintaining duplicate edges.
*/
def union(graph: Graph[K, VV, EV]): Graph[K, VV, EV]
/**
* Performs Difference on the vertex and edge sets of the input graphs
* removes common vertices and edges. If a source/target vertex is removed,
* its corresponding edge will also be removed
*/
def difference(graph: Graph[K, VV, EV]): Graph[K, VV, EV]
/**
* Performs intersect on the edge sets of the input graphs. Edges are considered equal, if they
* have the same source identifier, target identifier and edge value.
* The method computes pairs of equal edges from the input graphs. If the same edge occurs
* multiple times in the input graphs, there will be multiple edge pairs to be considered. Each
* edge instance can only be part of one pair. If the given parameter `distinctEdges` is set
* to `true`, there will be exactly one edge in the output graph representing all pairs of
* equal edges. If the parameter is set to `false`, both edges of each pair will be in the
* output.
* Vertices in the output graph will have no vertex values.
*/
def intersect(graph: Graph[K, VV, EV], distinctEdges: Boolean): Graph[K, NullValue, EV]Basic graph statistics and properties.
/**
* @return a long integer representing the number of vertices
*/
def numberOfVertices(): Long
/**
* @return a long integer representing the number of edges
*/
def numberOfEdges(): LongUsage Examples:
// Adding elements
val newVertex = new Vertex(4L, "David")
val graphWithVertex = graph.addVertex(newVertex)
val newEdge = new Edge(3L, 4L, 0.7)
val graphWithEdge = graph.addEdge(new Vertex(3L, "Charlie"), newVertex, 0.7)
// Set operations
val graph1 = Graph.fromTupleDataSet(vertices1, edges1, env)
val graph2 = Graph.fromTupleDataSet(vertices2, edges2, env)
val unionGraph = graph1.union(graph2)
val intersectionGraph = graph1.intersect(graph2, distinctEdges = true)
// Basic metrics
val vertexCount = graph.numberOfVertices()
val edgeCount = graph.numberOfEdges()