From bad029e30eb7baa1476a96abcffdd0b61f6d3967 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Wed, 6 Aug 2025 12:31:55 +0200 Subject: [PATCH] Replace UDF by built-in function in skewedJoin --- core/src/main/scala/org/graphframes/GraphFrame.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/src/main/scala/org/graphframes/GraphFrame.scala b/core/src/main/scala/org/graphframes/GraphFrame.scala index 9b7907488..be523af84 100644 --- a/core/src/main/scala/org/graphframes/GraphFrame.scala +++ b/core/src/main/scala/org/graphframes/GraphFrame.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.functions.expr import org.apache.spark.sql.functions.lit import org.apache.spark.sql.functions.monotonically_increasing_id import org.apache.spark.sql.functions.struct -import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types._ import org.apache.spark.storage.StorageLevel import org.graphframes.lib._ @@ -705,9 +704,7 @@ object GraphFrame extends Serializable with Logging { a.join(b, joinCol) } else { logDebug(s"$logPrefix Skewed join with ${hubs.size} high-degree keys.") - val isHub = udf { id: T => - hubs.contains(id) - } + val isHub = (c: Column) => c.isInCollection(hubs) val hashJoined = a .filter(!isHub(col(joinCol))) .join(b.filter(!isHub(col(joinCol))), joinCol)