diff --git a/build.sbt b/build.sbt index 9787a7cf9..925365dfa 100644 --- a/build.sbt +++ b/build.sbt @@ -108,6 +108,8 @@ lazy val root = (project in file(".")) commonSetting, name := "graphframes", moduleName := s"${name.value}-spark$sparkMajorVer", + // Export the JAR so that this can be excluded from shading in connect + exportJars := true, // Global settings Global / concurrentRestrictions := Seq(Tags.limitAll(1)), @@ -116,16 +118,6 @@ lazy val root = (project in file(".")) Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer", - // Assembly settings - assembly / test := {}, // No tests in assembly - assemblyPackageScala / assembleArtifact := false, - assembly / assemblyMergeStrategy := { - case PathList("META-INF", xs @ _*) => MergeStrategy.discard - case x if x.endsWith("module-info.class") => MergeStrategy.discard - case x => - val oldStrategy = (assembly / assemblyMergeStrategy).value - oldStrategy(x) - }, Test / packageBin / publishArtifact := false, Test / packageDoc / publishArtifact := false, Test / packageSrc / publishArtifact := false, @@ -136,39 +128,29 @@ lazy val root = (project in file(".")) lazy val connect = (project in file("graphframes-connect")) .dependsOn(root) .settings( - commonSetting, name := s"graphframes-connect", moduleName := s"${name.value}-spark${sparkMajorVer}", + commonSetting, Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer", Compile / PB.targets := Seq(PB.gens.java -> (Compile / sourceManaged).value), Compile / PB.includePaths ++= Seq(file("src/main/protobuf")), PB.protocVersion := protocVersion, + PB.additionalDependencies := Nil, libraryDependencies ++= Seq( "org.apache.spark" %% "spark-connect" % sparkVer % "provided" cross CrossVersion.for3Use2_13), // Assembly and shading + assembly / assemblyJarName := s"${moduleName.value}_${(scalaBinaryVersion).value}-${version.value}.jar", assembly / test := {}, - assemblyPackageScala / assembleArtifact := false, assembly / assemblyShadeRules := Seq( ShadeRule.rename("com.google.protobuf.**" -> protobufShadingPattern).inAll), - assembly / assemblyMergeStrategy := { - case PathList("google", "protobuf", xs @ _*) => MergeStrategy.discard - case PathList("META-INF", xs @ _*) => MergeStrategy.discard - case x if x.endsWith("module-info.class") => MergeStrategy.discard - case x => MergeStrategy.first - }, - assembly / assemblyExcludedJars := (Compile / fullClasspath).value.filter { className => - className.data - .getName() - .contains("scala-library-") || className.data - .getName() - .contains("slf4j-api-") - }, - publish / skip := false, + // Don't actually shade anything, we just need to rename the protobuf packages to what's bundled with Spark + assembly / assemblyExcludedJars := (assembly / fullClasspath).value, Compile / packageBin := assembly.value, Test / packageBin / publishArtifact := false, Test / packageDoc / publishArtifact := false, Test / packageSrc / publishArtifact := false, Compile / packageBin / publishArtifact := true, Compile / packageDoc / publishArtifact := false, - Compile / packageSrc / publishArtifact := false) + Compile / packageSrc / publishArtifact := false + ) diff --git a/python/dev/build_jar.py b/python/dev/build_jar.py index d9920a7ca..3bcce30ca 100644 --- a/python/dev/build_jar.py +++ b/python/dev/build_jar.py @@ -16,9 +16,9 @@ def build(spark_versions: Sequence[str] = ["3.5.5"]): sbt_executable, f"-Dspark.version={spark_version}", "clean", - "assembly", + "package", "connect/clean", - "connect/assembly" + "connect/package" ] sbt_build = subprocess.Popen( sbt_build_command, diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 6265cc33e..aeb543cbe 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -36,7 +36,7 @@ def get_gf_jar_locations() -> Tuple[str, str]: core_jar: Optional[str] = None connect_jar: Optional[str] = None - for pp in core_dir.glob("graphframes-assembly-*.jar"): + for pp in core_dir.glob(f"graphframes-spark{spark_major_version}*.jar"): assert isinstance(pp, pathlib.PosixPath) # type checking core_jar = str(pp.absolute()) @@ -45,7 +45,7 @@ def get_gf_jar_locations() -> Tuple[str, str]: f"Failed to find graphframes jar for Spark {spark_major_version} in {core_dir}" ) - for pp in connect_dir.glob("graphframes-connect-assembly-*.jar"): + for pp in connect_dir.glob(f"graphframes-connect-spark{spark_major_version}*.jar"): assert isinstance(pp, pathlib.PosixPath) # type checking connect_jar = str(pp.absolute()) diff --git a/src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala b/src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala index a89f0a1a7..0e4b4df1a 100644 --- a/src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala +++ b/src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala @@ -11,11 +11,12 @@ import org.graphframes.GraphFrameTestSparkContext import org.graphframes.SparkFunSuite import org.graphframes.examples.LDBCUtils +import java.io.File import java.nio.file._ import java.util.Properties class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext { - private val resourcesPath = Paths.get(getClass().getResource("/").toURI()) + private val resourcesPath = Path.of(new File("target").toURI()) private val unreachableID = 9223372036854775807L private def readUndirectedUnweighted(pathPrefix: String): GraphFrame = {