Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 9 additions & 27 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ lazy val root = (project in file("."))
commonSetting,
name := "graphframes",
moduleName := s"${name.value}-spark$sparkMajorVer",
// Export the JAR so that this can be excluded from shading in connect
exportJars := true,

// Global settings
Global / concurrentRestrictions := Seq(Tags.limitAll(1)),
Expand All @@ -116,16 +118,6 @@ lazy val root = (project in file("."))

Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",

// Assembly settings
assembly / test := {}, // No tests in assembly
assemblyPackageScala / assembleArtifact := false,
assembly / assemblyMergeStrategy := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x if x.endsWith("module-info.class") => MergeStrategy.discard
case x =>
val oldStrategy = (assembly / assemblyMergeStrategy).value
oldStrategy(x)
},
Comment on lines -119 to -128
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this because I don't think there's any need to run assembly on the root project? Unless you want to keep the ability to manual build a fat JAR

Test / packageBin / publishArtifact := false,
Test / packageDoc / publishArtifact := false,
Test / packageSrc / publishArtifact := false,
Expand All @@ -136,39 +128,29 @@ lazy val root = (project in file("."))
lazy val connect = (project in file("graphframes-connect"))
.dependsOn(root)
.settings(
commonSetting,
name := s"graphframes-connect",
moduleName := s"${name.value}-spark${sparkMajorVer}",
commonSetting,
Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",
Compile / PB.targets := Seq(PB.gens.java -> (Compile / sourceManaged).value),
Compile / PB.includePaths ++= Seq(file("src/main/protobuf")),
PB.protocVersion := protocVersion,
PB.additionalDependencies := Nil,
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-connect" % sparkVer % "provided" cross CrossVersion.for3Use2_13),

// Assembly and shading
assembly / assemblyJarName := s"${moduleName.value}_${(scalaBinaryVersion).value}-${version.value}.jar",
assembly / test := {},
assemblyPackageScala / assembleArtifact := false,
assembly / assemblyShadeRules := Seq(
ShadeRule.rename("com.google.protobuf.**" -> protobufShadingPattern).inAll),
assembly / assemblyMergeStrategy := {
case PathList("google", "protobuf", xs @ _*) => MergeStrategy.discard
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x if x.endsWith("module-info.class") => MergeStrategy.discard
case x => MergeStrategy.first
},
assembly / assemblyExcludedJars := (Compile / fullClasspath).value.filter { className =>
className.data
.getName()
.contains("scala-library-") || className.data
.getName()
.contains("slf4j-api-")
},
publish / skip := false,
// Don't actually shade anything, we just need to rename the protobuf packages to what's bundled with Spark
assembly / assemblyExcludedJars := (assembly / fullClasspath).value,
Compile / packageBin := assembly.value,
Test / packageBin / publishArtifact := false,
Test / packageDoc / publishArtifact := false,
Test / packageSrc / publishArtifact := false,
Compile / packageBin / publishArtifact := true,
Compile / packageDoc / publishArtifact := false,
Compile / packageSrc / publishArtifact := false)
Compile / packageSrc / publishArtifact := false
)
4 changes: 2 additions & 2 deletions python/dev/build_jar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def build(spark_versions: Sequence[str] = ["3.5.5"]):
sbt_executable,
f"-Dspark.version={spark_version}",
"clean",
"assembly",
"package",
"connect/clean",
"connect/assembly"
"connect/package"
]
sbt_build = subprocess.Popen(
sbt_build_command,
Expand Down
4 changes: 2 additions & 2 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
core_jar: Optional[str] = None
connect_jar: Optional[str] = None

for pp in core_dir.glob("graphframes-assembly-*.jar"):
for pp in core_dir.glob(f"graphframes-spark{spark_major_version}*.jar"):
assert isinstance(pp, pathlib.PosixPath) # type checking
core_jar = str(pp.absolute())

Expand All @@ -45,7 +45,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
f"Failed to find graphframes jar for Spark {spark_major_version} in {core_dir}"
)

for pp in connect_dir.glob("graphframes-connect-assembly-*.jar"):
for pp in connect_dir.glob(f"graphframes-connect-spark{spark_major_version}*.jar"):
assert isinstance(pp, pathlib.PosixPath) # type checking
connect_jar = str(pp.absolute())

Expand Down
3 changes: 2 additions & 1 deletion src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import org.graphframes.GraphFrameTestSparkContext
import org.graphframes.SparkFunSuite
import org.graphframes.examples.LDBCUtils

import java.io.File
import java.nio.file._
import java.util.Properties

class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext {
private val resourcesPath = Paths.get(getClass().getResource("/").toURI())
private val resourcesPath = Path.of(new File("target").toURI())
private val unreachableID = 9223372036854775807L

private def readUndirectedUnweighted(pathPrefix: String): GraphFrame = {
Expand Down