graphframes · rjurney · Mar 10, 2025 · Feb 16, 2025 · Feb 16, 2025 · Feb 16, 2025
diff --git a/docs/img/Directed-Graphlet-G30.png b/docs/img/Directed-Graphlet-G30.png
diff --git a/docs/motif-tutorial.md b/docs/motif-tutorial.md
@@ -92,7 +92,6 @@ from pyspark import SparkContext
 from pyspark.sql import DataFrame, SparkSession
 
 # Initialize a SparkSession
-
 spark: SparkSession = (
     SparkSession.builder.appName("Stack Overflow Motif Analysis")
     # Lets the Id:(Stack Overflow int) and id:(GraphFrames ULID) coexist
@@ -103,7 +102,6 @@ sc: SparkContext = spark.sparkContext
 sc.setCheckpointDir("/tmp/graphframes-checkpoints")
 
 # Change me if you download a different stackexchange site
-
 STACKEXCHANGE_SITE = "stats.meta.stackexchange.com"
 BASE_PATH = f"python/graphframes/tutorials/data/{STACKEXCHANGE_SITE}"
 {% endhighlight %}
@@ -118,21 +116,17 @@ Load the nodes and edges of the graph from the `data` folder and count the types
 #
 
 # We created these in stackexchange.py from Stack Exchange data dump XML files
-
 NODES_PATH: str = f"{BASE_PATH}/Nodes.parquet"
 nodes_df: DataFrame = spark.read.parquet(NODES_PATH)
 
 # Repartition the nodes to give our motif searches parallelism
-
 nodes_df = nodes_df.repartition(50).checkpoint().cache()
 
 # We created these in stackexchange.py from Stack Exchange data dump XML files
-
 EDGES_PATH: str = f"{BASE_PATH}/Edges.parquet"
 edges_df: DataFrame = spark.read.parquet(EDGES_PATH)
 
 # Repartition the edges to give our motif searches parallelism
-
 edges_df = edges_df.repartition(50).checkpoint().cache()
 {% endhighlight %}
 </div>
@@ -243,7 +237,6 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     return df
 
 # Now apply this function to each of your DataFrames to get a consistent schema
-
 posts_df = add_missing_columns(posts_df, all_cols).select(all_column_names)
 post_links_df = add_missing_columns(post_links_df, all_cols).select(all_column_names)
 users_df = add_missing_columns(users_df, all_cols).select(all_column_names)
@@ -322,7 +315,6 @@ valid_edge_count = (
 )
 
 # Just up and die if we have edges that point to non-existent nodes
-
 assert (
     edge_count == valid_edge_count
 ), f"Edge count {edge_count} != valid edge count {valid_edge_count}"
@@ -359,7 +351,6 @@ A complete description of the graph query language is in the [GraphFrames User G
 paths = g.find("(a)-[e1]->(b); (b)-[e2]->(c); (c)-[e3]->(a)")
 
 # Show the first path
-
 paths.show(3)
 {% endhighlight %}
 </div>

diff --git a/docs/quick-start.md b/docs/quick-start.md
@@ -66,6 +66,7 @@ val v = spark.createDataFrame(List(
   ("b", "Bob", 36),
   ("c", "Charlie", 30)
 )).toDF("id", "name", "age")
+
 // Create an Edge DataFrame with "src" and "dst" columns
 val e = spark.createDataFrame(List(
   ("a", "b", "friend"),
@@ -96,6 +97,7 @@ v = spark.createDataFrame([
   ("b", "Bob", 36),
   ("c", "Charlie", 30),
 ], ["id", "name", "age"])
+
 # Create an Edge DataFrame with "src" and "dst" columns
 e = spark.createDataFrame([
   ("a", "b", "friend"),

diff --git a/docs/user-guide.md b/docs/user-guide.md
@@ -45,6 +45,7 @@ val v = spark.createDataFrame(List(
   ("f", "Fanny", 36),
   ("g", "Gabby", 60)
 )).toDF("id", "name", "age")
+
 // Edge DataFrame
 val e = spark.createDataFrame(List(
   ("a", "b", "friend"),
@@ -80,6 +81,7 @@ v = spark.createDataFrame([
   ("f", "Fanny", 36),
   ("g", "Gabby", 60)
 ], ["id", "name", "age"])
+
 # Edge DataFrame
 e = spark.createDataFrame([
   ("a", "b", "friend"),
@@ -172,8 +174,7 @@ from graphframes.examples import Graphs
 
 g = Graphs(spark).friends()  # Get example graph
 
-# Display the vertex and edge DataFrames
-
+# Display the vertex DataFrame
 g.vertices.show()
 
 # +--+-------+---+
@@ -188,6 +189,7 @@ g.vertices.show()
 # | g|  Gabby| 60|
 # +--+-------+---+
 
+# Display the edge DataFrame
 g.edges.show()
 
 # +---+---+------------+
@@ -368,6 +370,7 @@ from pyspark.sql.functions import col, lit, when
 from pyspark.sql.types import IntegerType
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 chain4 = g.find("(a)-[ab]->(b); (b)-[bc]->(c); (c)-[cd]->(d)")
@@ -476,7 +479,6 @@ paths = g.find("(a)-[e]->(b)")\
   .filter("a.age < b.age")
 
 # "paths" contains vertex info. Extract the edges
-
 e2 = paths.select("e.src", "e.dst", "e.relationship")
 
 # In Spark 1.5+, the user may simplify this call
@@ -539,6 +541,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 # Search from "Esther" for users of age < 32
@@ -630,6 +633,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 sc.setCheckpointDir("/tmp/spark-checkpoints")
 
 g = Graphs(spark).friends()  # Get example graph
@@ -678,6 +682,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 result = g.labelPropagation(maxIter=5)
@@ -741,6 +746,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 # Run PageRank until convergence to tolerance "tol"
@@ -796,6 +802,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 results = g.shortestPaths(landmarks=["a", "d"])
@@ -832,6 +839,7 @@ For API details, refer to the [API docs](api/python/graphframes.html#graphframes
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 results = g.triangleCount()
@@ -875,6 +883,7 @@ val sameG = GraphFrame(sameV, sameE)
 {% highlight python %}
 from graphframes.examples import Graphs
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 # Save vertices and edges as Parquet to some location
@@ -946,6 +955,7 @@ from graphframes.lib import AggregateMessages as AM
 from graphframes.examples import Graphs
 from pyspark.sql.functions import sum as sqlsum
 
+
 g = Graphs(spark).friends()  # Get example graph
 
 # For each user, sum the ages of the adjacent users