From f19731edeeb6dd3b3a106b77d88c0b62194c3cc2 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Fri, 4 Jul 2025 06:44:05 +0200 Subject: [PATCH 1/2] Update README for the upcoming release --- README.md | 90 ++++++++++++++++++++++++++++++------------- python/pyproject.toml | 2 +- 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 5c8c67f55..cd09ef970 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Python CI](https://github.com/graphframes/graphframes/actions/workflows/python-ci.yml/badge.svg)](https://github.com/graphframes/graphframes/actions/workflows/python-ci.yml) [![pages-build-deployment](https://github.com/graphframes/graphframes/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/graphframes/graphframes/actions/workflows/pages/pages-build-deployment) [![scala-central-publish](https://github.com/graphframes/graphframes/actions/workflows/scala-publish.yml/badge.svg)](https://github.com/graphframes/graphframes/actions/workflows/scala-publish.yml) +[![python-pypi-publish](https://github.com/graphframes/graphframes/actions/workflows/python-publish.yml/badge.svg)](https://github.com/graphframes/graphframes/actions/workflows/python-publish.yml) # GraphFrames: DataFrame-based Graphs @@ -13,23 +14,58 @@ You can find user guide and API docs at https://graphframes.io ## GraphFrames is Back! -This projects was in maintenance mode for some time, but we are happy to announce that it is now back in active development! We are working on a new release with many bug fixes and improvements. We are also working on a new website and documentation. +This project was in maintenance mode for some time, but we are happy to announce that it is now back in active development! ## Installation and Quick-Start -The easiest way to start using GraphFrames is through the [Spark Packages system](https://spark-packages.org/package/graphframes/graphframes). Just run the following command: +### GraphFrames core + +GraphFrames scala core and Spark-Connect plugin are published in the Sonatype Central. ```bash # Interactive Scala/Java -$ spark-shell --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 -# Interactive Python -$ pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 +# For Spark 3.5.x, scala 2.12 +$ spark-shell --packages io.graphframes:graphframes-spark3_2.12:0.9.0 + +# For Spark 3.5.x, scala 2.13 +$ spark-shell --packages io.graphframes:graphframes-spark3_2.13:0.9.0 + +# For Spark 4.0.x +$ spark-shell --packages io.graphframes:graphframes-spark4_2.13:0.9.0 + +# Interactive Python, Spark 3.5.x +$ pyspark --packages io.graphframes:graphframes-spark3_2.12:0.9.0 + +# Interactive Python, Spark 4.0.x +$ pyspark --packages io.graphframes:graphframes-spark4_2.13:0.9.0 +``` + +### GraphFrames Python API + +Python API is published in the PyPi: + +```bash +pip install graphframes-py +``` + +**NOTE!** *Python distribution does not include JVM-core. You need to add it to your cluster or Spark-Connect server!* + +### GraphFrames Spark Connect + +To add GraphFrames to your spark connect server, you need to specify the plugin name, for example: -# Submit a script in Scala/Java/Python -$ spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 script.py +```bash +./sbin/start-connect-server.sh \ + --conf spark.connect.extensions.relation.classes=org.apache.spark.sql.graphframes.GraphFramesConnect \ + --packages io.graphframes.graphframes-connect-spark4_2.13:0.9.0 + --conf spark.checkpoint.dir=${CHECKPOINT_DIR} ``` +**NOTE!** *GraphFrames is relying on iterative graph algorithms and uses checkpoints internally to avoid infinite growing of the Spark's Logical Plan. Spark-Connect API does not provide the way to specify the checkpoint dir and it should be specified via `spark.checkpoint.dir` configuration!* + +### Quick Start + Now you can create a GraphFrame as follows. In Python: @@ -145,7 +181,9 @@ To learn more about GraphFrames, check out these resources: * [GraphFrames Google Group](https://groups.google.com/forum/#!forum/graphframes) * [#graphframes Discord Channel on GraphGeeks](https://discord.com/channels/1162999022819225631/1326257052368113674) -## `graphframes-py` is our Official PyPi Package +## Note about Python API distribution + +`graphframes-py` is our Official PyPi Package We recommend using the Spark Packages system to install the latest version of GraphFrames, but now publish a build of our Python package to PyPi in the [graphframes-py](https://pypi.org/project/graphframes-py/) package. It can be used to provide type hints in IDEs, but does not load the java-side of GraphFrames so will not work without loading the GraphFrames package. See [Installation and Quick-Start](#installation-and-quick-start). @@ -153,28 +191,30 @@ We recommend using the Spark Packages system to install the latest version of Gr pip install graphframes-py ``` -This project does not own or control the [graphframes PyPI package](https://pypi.org/project/graphframes/) (installs 0.6.0) or [graphframes-latest PyPI package](https://pypi.org/project/graphframes-latest/) (installs 0.8.4). - -## GraphFrames and sbt - -If you use the sbt-spark-package plugin, in your sbt build file, add the following, pulled from [GraphFrames on Spark Packages](https://spark-packages.org/package/graphframes/graphframes): +**WARNING!** -``` -spDependencies += "graphframes/graphframes:0.8.4-spark3.5-s_2.12" -``` +This project does not own or control the [graphframes PyPI package](https://pypi.org/project/graphframes/) (installs 0.6.0) or [graphframes-latest PyPI package](https://pypi.org/project/graphframes-latest/) (installs 0.8.4). -Otherwise, +**WARNING!** +## Maven and SBT + +Maven: +```xml + + + io.graphframes + graphframes-spark4_2.13 + 0.9.0 + + ``` -resolvers += "Spark Packages Repo" at "https://repos.spark-packages.org/" -libraryDependencies += "graphframes" % "graphframes" % "0.8.4-spark3.5-s_2.12" +SBT: +```sbt +libraryDependencies += "io.graphframes" %% "graphframes-spark4" % "0.9.0" ``` -## GraphFrames and Maven - -Please see the section about nightly builds! - **WARNING!** **=========================** @@ -195,9 +235,7 @@ To compile the Spark Connect Plugin, run `build/sbt connect/package` ## Spark version compatibility -This project is compatible with Spark 3.4+. Significant speed improvements have been made to DataFrames in recent versions of Spark, so you may see speedups from using the latest Spark version. - -Nightly builds of GraphFrames: +This project is compatible with Spark 3.5.x and Spark 4.0.x. Significant speed improvements have been made to DataFrames in recent versions of Spark, so you may see speedups from using the latest Spark version. | Component | Spark 3.x (Scala 2.12) | Spark 3.x (Scala 2.13) | Spark 4.x (Scala 2.13) | |---------------------|------------------------|------------------------|------------------------| diff --git a/python/pyproject.toml b/python/pyproject.toml index e95f7fb82..17c3c73d0 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "graphframes-py" -version = "0.8.4" +version = "0.9.0" description = "GraphFrames: Graph Processing Framework for Apache Spark" authors = ["GraphFrames Contributors "] license = "Apache 2.0" From c0abec6f55aad478ec6857e6be5cba7595767a3d Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Sat, 12 Jul 2025 07:22:15 +0200 Subject: [PATCH 2/2] Fix from comment and drop legacy VERSION file --- README.md | 2 +- python/VERSION | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 python/VERSION diff --git a/README.md b/README.md index cd09ef970..38fef609c 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ This project was in maintenance mode for some time, but we are happy to announce ### GraphFrames core -GraphFrames scala core and Spark-Connect plugin are published in the Sonatype Central. +GraphFrames scala core and Spark-Connect plugin are published in the Sonatype Central. Namespace is `io.graphframes`. ```bash # Interactive Scala/Java diff --git a/python/VERSION b/python/VERSION deleted file mode 100644 index b60d71966..000000000 --- a/python/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.8.4