Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
This project uses the LDBC datasets, which are licensed under the Apache Software License, Version 2.0.
The LDBC datasets are used for testing and evaluation purposes only.
The LDBC datasets are used for testing and evaluation purposes only. The LDBC project has the following NOTICE:
Note that the LDBC benchmark results should not be referred to using the words 'LDBC benchmark' or any equivalent phrase,
as per the LDBC fair use policy.

This project contains the code of Apache Spark GraphX that has the following NOTICE:
Copyright 2014-2025 The Apache Software Foundation.

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
42 changes: 37 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,11 @@ publishArtifact := false

lazy val commonSetting = Seq(
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-graphx" % sparkVer % "provided" cross CrossVersion.for3Use2_13,
"org.apache.spark" %% "spark-sql" % sparkVer % "provided" cross CrossVersion.for3Use2_13,
"org.apache.spark" %% "spark-mllib" % sparkVer % "provided" cross CrossVersion.for3Use2_13,
"org.slf4j" % "slf4j-api" % "2.0.17" % "provided",
"org.scalatest" %% "scalatest" % defaultScalaTestVer % Test,
"com.github.zafarkhaja" % "java-semver" % "0.10.2" % Test),
Compile / scalacOptions ++= Seq("-deprecation", "-feature"),
Compile / doc / scalacOptions ++= Seq(
"-groups",
"-implicits",
Expand Down Expand Up @@ -114,9 +112,43 @@ lazy val commonSetting = Seq(
ScalacOptions.source3,
ScalacOptions.fatalWarnings),
tpolecatExcludeOptions ++= Set(ScalacOptions.warnNonUnitStatement),
Test / tpolecatExcludeOptions ++= Set(ScalacOptions.warnValueDiscard))
Test / tpolecatExcludeOptions ++= Set(
ScalacOptions.warnValueDiscard,
ScalacOptions.warnUnusedLocals,
ScalacOptions.warnUnusedExplicits,
ScalacOptions.warnUnusedImplicits,
ScalacOptions.warnUnusedParams,
ScalacOptions.warnUnusedPrivates,
ScalacOptions.warnNumericWiden,
ScalacOptions.privateWarnNumericWiden,
))

lazy val graphx = (project in file("graphx"))
.settings(
commonSetting,
name := "graphframes-graphx",
moduleName := s"${name.value}-spark$sparkMajorVer",
// Export the JAR so that this can be excluded from shading in connect
exportJars := true,

// for scala 2.13 we should mark "unused" class tags by @nowarn,
// for scala 2.12 we shouldn't
// the only way at the moment is to not check unused @nowarn for GraphX
tpolecatExcludeOptions ++= Set(ScalacOptions.warnUnusedNoWarn, ScalacOptions.privateWarnUnusedNoWarn),

// Global settings
Global / concurrentRestrictions := Seq(Tags.limitAll(1)),
autoAPIMappings := true,
coverageHighlighting := false,
Test / packageBin / publishArtifact := false,
Test / packageDoc / publishArtifact := false,
Test / packageSrc / publishArtifact := false,
Compile / packageBin / publishArtifact := true,
Compile / packageDoc / publishArtifact := true,
Compile / packageSrc / publishArtifact := true)

lazy val core = (project in file("core"))
.dependsOn(graphx)
.settings(
commonSetting,
name := "graphframes",
Expand Down Expand Up @@ -217,8 +249,8 @@ lazy val docs = (project in file("docs"))
.withConfigValue(LaikaKeys.siteBaseURL, siteBaseUri)
.withConfigValue("pydoc.baseUri", s"$siteBaseUri/api/python")
.withConfigValue("scaladoc.baseUri", s"$siteBaseUri/api/scaladoc")
.withConfigValue("spark.version", sparkVer)
.withConfigValue("scala.version", scalaVer),
.withConfigValue("spark.version", sparkVer)
.withConfigValue("scala.version", scalaVer),
laikaExtensions := Seq(GitHubFlavor, SyntaxHighlighting, LaikaCustomDirectives),
laikaHTML := (laikaHTML dependsOn mdoc.toTask(
"") dependsOn generateAtomFeed dependsOn buildAndCopyScalaDoc dependsOn buildAndCopyPythonDoc dependsOn (core / Compile / doc)).value,
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/graphframes/GraphFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.graphframes

import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.Graph
import org.apache.spark.graphframes.graphx.Edge
import org.apache.spark.graphframes.graphx.Graph
import org.apache.spark.ml.clustering.PowerIterationClustering
import org.apache.spark.sql._
import org.apache.spark.sql.functions.array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

package org.graphframes.examples

import org.apache.spark.graphx.Graph
import org.apache.spark.graphx.VertexRDD
import org.apache.spark.graphx.{Edge => GXEdge}
import org.apache.spark.graphframes.graphx.Graph
import org.apache.spark.graphframes.graphx.VertexRDD
import org.apache.spark.graphframes.graphx.{Edge => GXEdge}
import org.apache.spark.sql.Column
import org.apache.spark.sql.Row
import org.apache.spark.sql.SparkSession
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.graphframes.lib

import org.apache.hadoop.fs.Path
import org.apache.spark.graphframes.graphx
import org.apache.spark.sql.Column
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
Expand Down Expand Up @@ -183,7 +184,7 @@ object ConnectedComponents extends Logging {

private def runGraphX(graph: GraphFrame, maxIter: Int): DataFrame = {
val components =
org.apache.spark.graphx.lib.ConnectedComponents.run(graph.cachedTopologyGraphX, maxIter)
graphx.lib.ConnectedComponents.run(graph.cachedTopologyGraphX, maxIter)
GraphXConversions.fromGraphX(graph, components, vertexNames = Seq(COMPONENT)).vertices
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.Graph
import org.apache.spark.graphframes.graphx.Graph
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Row
import org.apache.spark.sql.functions._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.apache.spark.sql.Column
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/graphframes/lib/PageRank.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.graphframes.GraphFrame

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.graphframes.GraphFrame
import org.graphframes.WithMaxIter

Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/graphframes/lib/SVDPlusPlus.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.graphframes.lib

import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.Edge
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Row
import org.graphframes.GraphFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.apache.spark.sql.Column
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.col
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.graphframes.lib

import org.apache.spark.graphx.{lib => graphxlib}
import org.apache.spark.graphframes.graphx.{lib => graphxlib}
import org.apache.spark.sql.DataFrame
import org.graphframes.GraphFrame
import org.graphframes.WithMaxIter
Expand Down
4 changes: 2 additions & 2 deletions core/src/test/scala/org/graphframes/GraphFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ package org.graphframes

import org.apache.commons.io.FileUtils
import org.apache.hadoop.fs.Path
import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.Graph
import org.apache.spark.graphframes.graphx.Edge
import org.apache.spark.graphframes.graphx.Graph
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Row
Expand Down
1 change: 0 additions & 1 deletion core/src/test/scala/org/graphframes/SparkFunSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,4 @@ private[graphframes] abstract class SparkFunSuite extends AnyFunSuite with Loggi
logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n")
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.graphframes.graphx;

import java.io.Serializable;

/**
* Represents a subset of the fields of an [[EdgeTriplet]] or [[EdgeContext]]. This allows the
* system to populate only those fields for efficiency.
*/
public class TripletFields implements Serializable {

/** Indicates whether the source vertex attribute is included. */
public final boolean useSrc;

/** Indicates whether the destination vertex attribute is included. */
public final boolean useDst;

/** Indicates whether the edge attribute is included. */
public final boolean useEdge;

/** Constructs a default TripletFields in which all fields are included. */
public TripletFields() {
this(true, true, true);
}

public TripletFields(boolean useSrc, boolean useDst, boolean useEdge) {
this.useSrc = useSrc;
this.useDst = useDst;
this.useEdge = useEdge;
}

/**
* None of the triplet fields are exposed.
*/
public static final TripletFields None = new TripletFields(false, false, false);

/**
* Expose only the edge field and not the source or destination field.
*/
public static final TripletFields EdgeOnly = new TripletFields(false, false, true);

/**
* Expose the source and edge fields but not the destination field. (Same as Src)
*/
public static final TripletFields Src = new TripletFields(true, false, true);

/**
* Expose the destination and edge fields but not the source field. (Same as Dst)
*/
public static final TripletFields Dst = new TripletFields(false, true, true);

/**
* Expose all the fields (source, edge, and destination).
*/
public static final TripletFields All = new TripletFields(true, true, true);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.graphframes.graphx.impl;

/**
* Criteria for filtering edges based on activeness. For internal use only.
*/
public enum EdgeActiveness {
/** Neither the source vertex nor the destination vertex need be active. */
Neither,
/** The source vertex must be active. */
SrcOnly,
/** The destination vertex must be active. */
DstOnly,
/** Both vertices must be active. */
Both,
/** At least one vertex must be active. */
Either
}
Loading