From f4e9cdbb9c1d56195c04e3142e01b7b77198085d Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sat, 15 Feb 2025 20:58:48 -0800
Subject: [PATCH 01/53] Converted tests to pytest. Build a Python package.
 Update requirements.txt and split out requirements-dev.txt. Version bumps.

---
 .github/workflows/python-ci.yml |   7 +-
 .gitignore                      |  10 +
 Dockerfile                      |  10 +-
 VERSION                         |   0
 build.sbt                       |   2 +-
 docs/_config.yml                |   2 +-
 python/.gitignore               |   4 -
 python/MANIFEST.in              |   4 +
 python/graphframes/tests.py     | 405 ++++++++++++++++++--------------
 python/requirements-dev.txt     |   6 +
 python/requirements.txt         |   5 +-
 python/run-tests.sh             |  17 +-
 python/setup.cfg                |  44 +++-
 python/setup.py                 |  37 ++-
 version.sbt                     |   2 +-
 15 files changed, 342 insertions(+), 213 deletions(-)
 delete mode 100644 VERSION
 delete mode 100644 python/.gitignore
 create mode 100644 python/requirements-dev.txt

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 8b84d6d82..36b6b97e7 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -7,8 +7,8 @@ jobs:
       matrix:
         include:
           - spark-version: 3.5.4
-            scala-version: 2.12.18
-            python-version: 3.9.19
+            scala-version: 2.12.20
+            python-version: 3.11.11
     runs-on: ubuntu-22.04
     env:
       # define Java options for both official sbt and sbt-extras
@@ -35,8 +35,11 @@ jobs:
       run: |
         python -m pip install --upgrade pip wheel
         pip install -r ./python/requirements.txt
+        pip install -r ./python/requirements-dev.txt
         pip install pyspark==${{ matrix.spark-version }}
     - name: Test
       run: |
+        python python/setup.py install
+        python python/setup.py bdist_wheel
         export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
         ./python/run-tests.sh
diff --git a/.gitignore b/.gitignore
index a07973c1e..dcbde8186 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,13 @@ project/plugins/project/
 
 # Mac
 *.DS_Store
+.vscode
+
+# Python specific
+python/build
+python/dist
+build/lib
+python/graphframes.egg-info
+python/graphframes/tutorials/data
+python/docs/_build
+python/docs/_site
diff --git a/Dockerfile b/Dockerfile
index 1c4430912..b9fe8c528 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,16 @@
 FROM ubuntu:22.04
 
-ARG PYTHON_VERSION=3.8
+ARG PYTHON_VERSION=3.9
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
-    apt-get install -y wget bzip2 build-essential openjdk-8-jdk ssh sudo && \
+    apt-get install -y wget bzip2 build-essential openjdk-11-jdk ssh sudo && \
     apt-get clean
 
 # Install Spark and update env variables.
-ENV SCALA_VERSION 2.12.17
-ENV SPARK_VERSION "3.4.1"
-ENV SPARK_BUILD "spark-${SPARK_VERSION}-bin-hadoop3.2"
+ENV SCALA_VERSION 2.12.20
+ENV SPARK_VERSION "3.5.4"
+ENV SPARK_BUILD "spark-${SPARK_VERSION}-bin-hadoop3"
 ENV SPARK_BUILD_URL "https://dist.apache.org/repos/dist/release/spark/spark-${SPARK_VERSION}/${SPARK_BUILD}.tgz"
 RUN wget --quiet "$SPARK_BUILD_URL" -O /tmp/spark.tgz && \
     tar -C /opt -xf /tmp/spark.tgz && \
diff --git a/VERSION b/VERSION
deleted file mode 100644
index e69de29bb..000000000
diff --git a/build.sbt b/build.sbt
index 061901717..63168c57d 100644
--- a/build.sbt
+++ b/build.sbt
@@ -3,7 +3,7 @@ import ReleaseTransformations._
 lazy val sparkVer = sys.props.getOrElse("spark.version", "3.5.4")
 lazy val sparkBranch = sparkVer.substring(0, 3)
 lazy val defaultScalaVer = sparkBranch match {
-  case "3.5" => "2.12.18"
+  case "3.5" => "2.12.20"
   case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.")
 }
 lazy val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer)
diff --git a/docs/_config.yml b/docs/_config.yml
index 4c1ab075c..379fc242f 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -13,7 +13,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-GRAPHFRAMES_VERSION: 0.8.4
+GRAPHFRAMES_VERSION: 0.8.5
 #SCALA_BINARY_VERSION: "2.10"
 #SCALA_VERSION: "2.10.4"
 #MESOS_VERSION: 0.21.0
diff --git a/python/.gitignore b/python/.gitignore
deleted file mode 100644
index 81410ca55..000000000
--- a/python/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-*.pyc
-docs/_build/
-build/
-dist/
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 73eaf8ba2..4eb0ee5af 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -2,3 +2,7 @@
 # https://github.com/pypa/sampleproject/blob/master/MANIFEST.in
 # For more details about the MANIFEST file, you may read the docs at
 # https://docs.python.org/2/distutils/sourcedist.html#the-manifest-in-template
+recursive-include python/graphframes *.py
+recursive-exclude * __pycache__
+recursive-exclude * *.pyc
+include graphframes/tutorials/data/.exists
diff --git a/python/graphframes/tests.py b/python/graphframes/tests.py
index 9a7ad1371..259435759 100644
--- a/python/graphframes/tests.py
+++ b/python/graphframes/tests.py
@@ -15,63 +15,72 @@
 # limitations under the License.
 #
 
-import sys
+import os
 import tempfile
 import shutil
 import re
 
-if sys.version_info[:2] <= (2, 6):
-    try:
-        import unittest2 as unittest
-    except ImportError:
-        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
-        sys.exit(1)
-else:
-    import unittest
-
-from pyspark import SparkContext
-from pyspark.sql import functions as sqlfunctions, SparkSession
+import pytest
+from pyspark import SparkConf, SparkContext
+from pyspark.sql import functions as F, SparkSession
 
 from .graphframe import GraphFrame, Pregel, _java_api, _from_java_gf
 from .lib import AggregateMessages as AM
 from .examples import Graphs, BeliefPropagation
 
+
+VERSION = open("version.sbt").read().strip()
+
+
+@pytest.fixture(scope="class", autouse=True)
+def set_spark(request, spark_session):
+    request.cls.spark = spark_session
+
+
+@pytest.mark.usefixtures("set_spark")
 class GraphFrameTestUtils(object):
 
     @classmethod
     def parse_spark_version(cls, version_str):
-        """ take an input version string
-            return version items in a dictionary
+        """take an input version string
+        return version items in a dictionary
         """
-        _sc_ver_patt = r'(\d+)\.(\d+)(\.(\d+)(-(.+))?)?'
+        _sc_ver_patt = r"(\d+)\.(\d+)(\.(\d+)(-(.+))?)?"
         m = re.match(_sc_ver_patt, version_str)
         if not m:
-            raise TypeError("version {} shoud be in <major>.<minor>.<maintenance>".format(version_str))
+            raise TypeError(
+                "version {} shoud be in <major>.<minor>.<maintenance>".format(version_str)
+            )
         version_info = {}
         try:
-            version_info['major'] = int(m.group(1))
+            version_info["major"] = int(m.group(1))
         except:
             raise TypeError("invalid minor version")
         try:
-            version_info['minor'] = int(m.group(2))
+            version_info["minor"] = int(m.group(2))
         except:
             raise TypeError("invalid major version")
         try:
-            version_info['maintenance'] = int(m.group(4))
+            version_info["maintenance"] = int(m.group(4))
         except:
-            version_info['maintenance'] = 0
+            version_info["maintenance"] = 0
         try:
-            version_info['special'] = m.group(6)
+            version_info["special"] = m.group(6)
         except:
             pass
         return version_info
 
     @classmethod
     def createSparkContext(cls):
-        cls.sc = sc = SparkContext('local[4]', "GraphFramesTests")
+        cls.conf = SparkConf().setAppName("GraphFramesTests")
+        cls.conf.set(
+            "spark.submit.pyFiles",
+            os.path.abspath("python/dist/graphframes-{VERSION}-py3-none-any.whl"),
+        )
+        cls.sc = SparkContext(master="local[4]", appName="GraphFramesTests", conf=cls.conf)
         cls.checkpointDir = tempfile.mkdtemp()
         cls.sc.setCheckpointDir(cls.checkpointDir)
-        cls.spark_version = cls.parse_spark_version(sc.version)
+        cls.spark_version = cls.parse_spark_version(cls.sc.version)
 
     @classmethod
     def stopSparkContext(cls):
@@ -81,10 +90,10 @@ def stopSparkContext(cls):
 
     @classmethod
     def spark_at_least_of_version(cls, version_str):
-        assert hasattr(cls, 'spark_version')
+        assert hasattr(cls, "spark_version")
         required_version = cls.parse_spark_version(version_str)
         spark_version = cls.spark_version
-        for _name in ['major', 'minor', 'maintenance']:
+        for _name in ["major", "minor", "maintenance"]:
             sc_ver = spark_version[_name]
             req_ver = required_version[_name]
             if sc_ver != req_ver:
@@ -92,28 +101,31 @@ def spark_at_least_of_version(cls, version_str):
         # All major.minor.maintenance equal
         return True
 
-def setUpModule():
-    GraphFrameTestUtils.createSparkContext()
 
-def tearDownModule():
+@pytest.fixture(scope="module", autouse=True)
+def spark_context():
+    GraphFrameTestUtils.createSparkContext()
+    yield
     GraphFrameTestUtils.stopSparkContext()
 
 
-class GraphFrameTestCase(unittest.TestCase):
+@pytest.fixture(scope="class")
+def spark_session():
+    # Create a SparkSession with a smaller number of shuffle partitions.
+    spark = (
+        SparkSession(GraphFrameTestUtils.sc)
+        .builder.config("spark.sql.shuffle.partitions", 4)
+        .getOrCreate()
+    )
+    yield spark
+    # No explicit stop; SparkContext shutdown will clean up.
 
-    @classmethod
-    def setUpClass(cls):
-        # Small tests run much faster with spark.sql.shuffle.partitions = 4
-        cls.spark = SparkSession(GraphFrameTestUtils.sc).builder.config('spark.sql.shuffle.partitions', 4).getOrCreate()
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.spark = None
 
+@pytest.mark.usefixtures("set_spark")
+class GraphFrameTest:
 
-class GraphFrameTest(GraphFrameTestCase):
-    def setUp(self):
-        super(GraphFrameTest, self).setUp()
+    def setup_method(self, method):
+        # Mimic setUp: create a simple GraphFrame instance for each test.
         localVertices = [(1, "A"), (2, "B"), (3, "C")]
         localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")]
         v = self.spark.createDataFrame(localVertices, ["id", "name"])
@@ -123,28 +135,38 @@ def setUp(self):
     def test_spark_version_check(self):
         gtu = GraphFrameTestUtils
         gtu.spark_version = gtu.parse_spark_version("2.0.2")
-        self.assertTrue(gtu.spark_at_least_of_version("1.7"))
-        self.assertTrue(gtu.spark_at_least_of_version("2.0"))
-        self.assertTrue(gtu.spark_at_least_of_version("2.0.1"))
-        self.assertTrue(gtu.spark_at_least_of_version("2.0.2"))
-        self.assertFalse(gtu.spark_at_least_of_version("2.0.3"))
-        self.assertFalse(gtu.spark_at_least_of_version("2.1"))
+
+        assert gtu.spark_at_least_of_version("1.7")
+        assert gtu.spark_at_least_of_version("2.0")
+        assert gtu.spark_at_least_of_version("2.0.1")
+        assert gtu.spark_at_least_of_version("2.0.2")
+        assert not gtu.spark_at_least_of_version("2.0.3")
+        assert not gtu.spark_at_least_of_version("2.1")
 
     def test_construction(self):
         g = self.g
-        vertexIDs = map(lambda x: x[0], g.vertices.select("id").collect())
+        vertexIDs = [row[0] for row in g.vertices.select("id").collect()]
         assert sorted(vertexIDs) == [1, 2, 3]
-        edgeActions = map(lambda x: x[0], g.edges.select("action").collect())
+
+        edgeActions = [row[0] for row in g.edges.select("action").collect()]
         assert sorted(edgeActions) == ["follow", "hate", "love"]
-        tripletsFirst = list(map(lambda x: (x[0][1], x[1][1], x[2][2]),
-                            g.triplets.sort("src.id").select("src", "dst", "edge").take(1)))
+
+        tripletsFirst = list(
+            map(
+                lambda x: (x[0][1], x[1][1], x[2][2]),
+                g.triplets.sort("src.id").select("src", "dst", "edge").take(1),
+            )
+        )
         assert tripletsFirst == [("A", "B", "love")], tripletsFirst
+
         # Try with invalid vertices and edges DataFrames
         v_invalid = self.spark.createDataFrame(
-            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"])
+            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"]
+        )
         e_invalid = self.spark.createDataFrame(
-            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"])
-        with self.assertRaises(ValueError):
+            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"]
+        )
+        with pytest.raises(ValueError):
             GraphFrame(v_invalid, e_invalid)
 
     def test_cache(self):
@@ -155,17 +177,17 @@ def test_cache(self):
     def test_degrees(self):
         g = self.g
         outDeg = g.outDegrees
-        self.assertSetEqual(set(outDeg.columns), {"id", "outDegree"})
+        assert set(outDeg.columns) == {"id", "outDegree"}
         inDeg = g.inDegrees
-        self.assertSetEqual(set(inDeg.columns), {"id", "inDegree"})
+        assert set(inDeg.columns) == {"id", "inDegree"}
         deg = g.degrees
-        self.assertSetEqual(set(deg.columns), {"id", "degree"})
+        assert set(deg.columns) == {"id", "degree"}
 
     def test_motif_finding(self):
         g = self.g
         motifs = g.find("(a)-[e]->(b)")
         assert motifs.count() == 3
-        self.assertSetEqual(set(motifs.columns), {"a", "e", "b"})
+        assert set(motifs.columns) == {"a", "e", "b"}
 
     def test_filterVertices(self):
         g = self.g
@@ -178,8 +200,8 @@ def test_filterVertices(self):
             e2 = g2.edges.select("src", "dst", "action").collect()
             assert len(v2) == len(expected_v)
             assert len(e2) == len(expected_e)
-            self.assertSetEqual(set(v2), set(expected_v))
-            self.assertSetEqual(set(e2), set(expected_e))
+            assert set(v2) == set(expected_v)
+            assert set(e2) == set(expected_e)
 
     def test_filterEdges(self):
         g = self.g
@@ -192,8 +214,8 @@ def test_filterEdges(self):
             e2 = g2.edges.select("src", "dst", "action").collect()
             assert len(v2) == len(expected_v)
             assert len(e2) == len(expected_e)
-            self.assertSetEqual(set(v2), set(expected_v))
-            self.assertSetEqual(set(e2), set(expected_e))
+            assert set(v2) == set(expected_v)
+            assert set(e2) == set(expected_e)
 
     def test_dropIsolatedVertices(self):
         g = self.g
@@ -204,74 +226,93 @@ def test_dropIsolatedVertices(self):
         expected_e = [(2, 3, "follow")]
         assert len(v2) == len(expected_v)
         assert len(e2) == len(expected_e)
-        self.assertSetEqual(set(v2), set(expected_v))
-        self.assertSetEqual(set(e2), set(expected_e))
+        assert set(v2) == set(expected_v)
+        assert set(e2) == set(expected_e)
 
     def test_bfs(self):
         g = self.g
         paths = g.bfs("name='A'", "name='C'")
-        self.assertEqual(paths.count(), 1)
-        self.assertEqual(paths.select("v1.name").head()[0], "B")
+        assert paths.count() == 1
+        # Expecting that the first intermediary vertex in the BFS is "B"
+        assert paths.select("v1.name").head()[0] == "B"
+
         paths2 = g.bfs("name='A'", "name='C'", edgeFilter="action!='follow'")
-        self.assertEqual(paths2.count(), 0)
+        assert paths2.count() == 0
+
         paths3 = g.bfs("name='A'", "name='C'", maxPathLength=1)
-        self.assertEqual(paths3.count(), 0)
+        assert paths3.count() == 0
 
 
-class PregelTest(GraphFrameTestCase):
-    def setUp(self):
-        super(PregelTest, self).setUp()
+@pytest.mark.usefixtures("set_spark")
+class TestPregel:
 
     def test_page_rank(self):
-        from pyspark.sql.functions import coalesce, col, lit, sum, when
-        edges = self.spark.createDataFrame([[0, 1],
-                                          [1, 2],
-                                          [2, 4],
-                                          [2, 0],
-                                          [3, 4], # 3 has no in-links
-                                          [4, 0],
-                                          [4, 2]], ["src", "dst"])
+        # Create an edge DataFrame; note that vertex 3 has no in-links.
+        edges = self.spark.createDataFrame(
+            [[0, 1], [1, 2], [2, 4], [2, 0], [3, 4], [4, 0], [4, 2]],
+            ["src", "dst"],
+        )
         edges.cache()
+
+        # Create a vertex DataFrame and count vertices.
         vertices = self.spark.createDataFrame([[0], [1], [2], [3], [4]], ["id"])
         numVertices = vertices.count()
+
+        # Get the outDegrees DataFrame from a GraphFrame built on the original vertices and edges.
         vertices = GraphFrame(vertices, edges).outDegrees
         vertices.cache()
+
+        # Construct a new GraphFrame with the updated vertices DataFrame.
         graph = GraphFrame(vertices, edges)
         alpha = 0.15
-        ranks = graph.pregel \
-            .setMaxIter(5) \
-            .withVertexColumn("rank", lit(1.0 / numVertices),
-                              coalesce(Pregel.msg(),
-                                       lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices)) \
-            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \
-            .aggMsgs(sum(Pregel.msg())) \
+
+        # Run PageRank via Pregel.
+        ranks = (
+            graph.pregel.setMaxIter(5)
+            .withVertexColumn(
+                "rank",
+                F.lit(1.0 / numVertices),
+                F.coalesce(Pregel.msg(), F.lit(0.0)) * F.lit(1.0 - alpha)
+                + F.lit(alpha / numVertices),
+            )
+            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree"))
+            .aggMsgs(F.sum(Pregel.msg()))
             .run()
+        )
+
+        # Collect and sort results.
         resultRows = ranks.sort(ranks.id).collect()
-        result = map(lambda x: x.rank, resultRows)
+        result = list(map(lambda x: x.rank, resultRows))
         expected = [0.245, 0.224, 0.303, 0.03, 0.197]
+
+        # Compare each result with its expected value using a tolerance of 1e-3.
         for a, b in zip(result, expected):
-            self.assertAlmostEqual(a, b, delta = 1e-3)
+            assert a == pytest.approx(b, abs=1e-3)
+
 
+@pytest.mark.usefixtures("set_spark")
+class TestGraphFrameLib:
 
-class GraphFrameLibTest(GraphFrameTestCase):
-    def setUp(self):
-        super(GraphFrameLibTest, self).setUp()
+    def setup_method(self, method):
+        # Set up the Java API instance for each test.
         self.japi = _java_api(self.spark._sc)
 
-    def _hasCols(self, graph, vcols = [], ecols = []):
-        map(lambda c: self.assertIn(c, graph.vertices.columns), vcols)
-        map(lambda c: self.assertIn(c, graph.edges.columns), ecols)
+    def _hasCols(self, graph, vcols=[], ecols=[]):
+        for c in vcols:
+            assert c in graph.vertices.columns, f"Vertex DataFrame missing column: {c}"
+        for c in ecols:
+            assert c in graph.edges.columns, f"Edge DataFrame missing column: {c}"
 
-    def _df_hasCols(self, vertices, vcols = []):
-        map(lambda c: self.assertIn(c, vertices.columns), vcols)
+    def _df_hasCols(self, df, vcols=[]):
+        for c in vcols:
+            assert c in df.columns, f"DataFrame missing column: {c}"
 
     def _graph(self, name, *args):
         """
-        Convenience to call one of the example graphs, passing the arguments and wrapping the result back
-        as a python object.
-        :param name: the name of the example graph
-        :param args: all the required arguments, without the initial spark session
-        :return:
+        Convenience to call one of the example graphs, passing the arguments and wrapping the result as a Python object.
+        :param name: the name of the example graph.
+        :param args: all the required arguments (excluding the initial SparkSession).
+        :return: a GraphFrame object.
         """
         examples = self.japi.examples()
         jgraph = getattr(examples, name)(*args)
@@ -281,83 +322,79 @@ def test_aggregate_messages(self):
         g = self._graph("friends")
         # For each user, sum the ages of the adjacent users,
         # plus 1 for the src's sum if the edge is "friend".
-        sendToSrc = (
-            AM.dst['age'] +
-            sqlfunctions.when(
-                AM.edge['relationship'] == 'friend',
-                sqlfunctions.lit(1)
-            ).otherwise(0))
-        sendToDst = AM.src['age']
+        sendToSrc = AM.dst["age"] + F.when(AM.edge["relationship"] == "friend", F.lit(1)).otherwise(
+            0
+        )
+        sendToDst = AM.src["age"]
         agg = g.aggregateMessages(
-            sqlfunctions.sum(AM.msg).alias('summedAges'),
-            sendToSrc=sendToSrc,
-            sendToDst=sendToDst)
-        # Run the aggregation again providing SQL expressions as String instead.
+            F.sum(AM.msg).alias("summedAges"), sendToSrc=sendToSrc, sendToDst=sendToDst
+        )
+        # Run the aggregation again using SQL expressions as Strings.
         agg2 = g.aggregateMessages(
             "sum(MSG) AS `summedAges`",
             sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)",
-            sendToDst="src['age']")
-        # Convert agg and agg2 to a mapping from id to the aggregated message.
-        aggMap = {id_: s for id_, s in agg.select('id', 'summedAges').collect()}
-        agg2Map = {id_: s for id_, s in agg2.select('id', 'summedAges').collect()}
-        # Compute the truth via brute force.
-        user2age = {id_: age for id_, age in g.vertices.select('id', 'age').collect()}
+            sendToDst="src['age']",
+        )
+        # Build mappings from id to the aggregated message.
+        aggMap = {row.id: row.summedAges for row in agg.select("id", "summedAges").collect()}
+        agg2Map = {row.id: row.summedAges for row in agg2.select("id", "summedAges").collect()}
+        # Compute the expected aggregation via brute force.
+        user2age = {row.id: row.age for row in g.vertices.select("id", "age").collect()}
         trueAgg = {}
-        for src, dst, rel in g.edges.select("src", "dst", "relationship").collect():
-            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == 'friend' else 0)
+        for row in g.edges.select("src", "dst", "relationship").collect():
+            src, dst, rel = row.src, row.dst, row.relationship
+            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == "friend" else 0)
             trueAgg[dst] = trueAgg.get(dst, 0) + user2age[src]
-        # Compare if the agg mappings match the brute force mapping
-        self.assertEqual(aggMap, trueAgg)
-        self.assertEqual(agg2Map, trueAgg)
-        # Check that TypeError is raises with messages of wrong type
-        with self.assertRaises(TypeError):
+        # Verify both aggregations match the expected results.
+        assert aggMap == trueAgg, f"aggMap {aggMap} does not equal expected {trueAgg}"
+        assert agg2Map == trueAgg, f"agg2Map {agg2Map} does not equal expected {trueAgg}"
+        # Check that passing a wrong type for messages raises a TypeError.
+        with pytest.raises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`",
-                sendToSrc=object(),
-                sendToDst="src['age']")
-        with self.assertRaises(TypeError):
+                "sum(MSG) AS `summedAges`", sendToSrc=object(), sendToDst="src['age']"
+            )
+        with pytest.raises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`",
-                sendToSrc=dst['age'],
-                sendToDst=object())
+                "sum(MSG) AS `summedAges`", sendToSrc=F.col("dst")["age"], sendToDst=object()
+            )
 
     def test_connected_components(self):
-        v = self.spark.createDataFrame([
-        (0, "a", "b")], ["id", "vattr", "gender"])
+        v = self.spark.createDataFrame([(0, "a", "b")], ["id", "vattr", "gender"])
         e = self.spark.createDataFrame([(0, 0, 1)], ["src", "dst", "test"]).filter("src > 10")
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=['id', 'component', 'vattr', 'gender'])
-        self.assertEqual(comps.count(), 1)
+        self._df_hasCols(comps, vcols=["id", "component", "vattr", "gender"])
+        assert comps.count() == 1
 
     def test_connected_components2(self):
         v = self.spark.createDataFrame([(0, "a0", "b0"), (1, "a1", "b1")], ["id", "A", "B"])
         e = self.spark.createDataFrame([(0, 1, "a01", "b01")], ["src", "dst", "A", "B"])
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=['id', 'component', 'A', 'B'])
-        self.assertEqual(comps.count(), 2)
+        self._df_hasCols(comps, vcols=["id", "component", "A", "B"])
+        assert comps.count() == 2
 
     def test_connected_components_friends(self):
         g = self._graph("friends")
-        comps_tests = []
-        comps_tests += [g.connectedComponents()]
-        comps_tests += [g.connectedComponents(broadcastThreshold=1)]
-        comps_tests += [g.connectedComponents(checkpointInterval=0)]
-        comps_tests += [g.connectedComponents(checkpointInterval=10)]
-        comps_tests += [g.connectedComponents(algorithm="graphx")]
+        comps_tests = [
+            g.connectedComponents(),
+            g.connectedComponents(broadcastThreshold=1),
+            g.connectedComponents(checkpointInterval=0),
+            g.connectedComponents(checkpointInterval=10),
+            g.connectedComponents(algorithm="graphx"),
+        ]
         for c in comps_tests:
-            self.assertEqual(c.groupBy("component").count().count(), 2)
+            assert c.groupBy("component").count().count() == 2
 
     def test_label_progagation(self):
         n = 5
         g = self._graph("twoBlobs", n)
         labels = g.labelPropagation(maxIter=4 * n)
         labels1 = labels.filter("id < 5").select("label").collect()
-        all1 = set([x.label for x in labels1])
+        all1 = {row.label for row in labels1}
         assert len(all1) == 1
         labels2 = labels.filter("id >= 5").select("label").collect()
-        all2 = set([x.label for x in labels2])
+        all2 = {row.label for row in labels2}
         assert len(all2) == 1
         assert all1 != all2
 
@@ -367,7 +404,7 @@ def test_page_rank(self):
         resetProb = 0.15
         errorTol = 1.0e-5
         pr = g.pageRank(resetProb, tol=errorTol)
-        self._hasCols(pr, vcols=['id', 'pagerank'], ecols=['src', 'dst', 'weight'])
+        self._hasCols(pr, vcols=["id", "pagerank"], ecols=["src", "dst", "weight"])
 
     def test_parallel_personalized_page_rank(self):
         n = 100
@@ -376,31 +413,34 @@ def test_parallel_personalized_page_rank(self):
         maxIter = 15
         sourceIds = [1, 2, 3, 4]
         pr = g.parallelPersonalizedPageRank(resetProb, sourceIds=sourceIds, maxIter=maxIter)
-        self._hasCols(pr, vcols=['id', 'pageranks'], ecols=['src', 'dst', 'weight'])
+        self._hasCols(pr, vcols=["id", "pageranks"], ecols=["src", "dst", "weight"])
 
     def test_shortest_paths(self):
         edges = [(1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)]
+        # Create bidirectional edges.
         all_edges = [z for (a, b) in edges for z in [(a, b), (b, a)]]
-        edges = self.spark.createDataFrame(all_edges, ["src", "dst"])
+        edgesDF = self.spark.createDataFrame(all_edges, ["src", "dst"])
         vertices = self.spark.createDataFrame([(i,) for i in range(1, 7)], ["id"])
-        g = GraphFrame(vertices, edges)
+        g = GraphFrame(vertices, edgesDF)
         landmarks = [1, 4]
         v2 = g.shortestPaths(landmarks)
         self._df_hasCols(v2, vcols=["id", "distances"])
 
     def test_svd_plus_plus(self):
         g = self._graph("ALSSyntheticData")
-        (v2, cost) = g.svdPlusPlus()
-        self._df_hasCols(v2, vcols=['id', 'column1', 'column2', 'column3', 'column4'])
+        v2, cost = g.svdPlusPlus()
+        self._df_hasCols(v2, vcols=["id", "column1", "column2", "column3", "column4"])
 
     def test_strongly_connected_components(self):
-        # Simple island test
+        # Simple island test.
         vertices = self.spark.createDataFrame([(i,) for i in range(1, 6)], ["id"])
         edges = self.spark.createDataFrame([(7, 8)], ["src", "dst"])
         g = GraphFrame(vertices, edges)
         c = g.stronglyConnectedComponents(5)
         for row in c.collect():
-            self.assertEqual(row.id, row.component)
+            assert (
+                row.id == row.component
+            ), f"Vertex {row.id} not equal to its component {row.component}"
 
     def test_triangle_counts(self):
         edges = self.spark.createDataFrame([(0, 1), (1, 2), (2, 0)], ["src", "dst"])
@@ -408,61 +448,66 @@ def test_triangle_counts(self):
         g = GraphFrame(vertices, edges)
         c = g.triangleCount()
         for row in c.select("id", "count").collect():
-            self.assertEqual(row.asDict()['count'], 1)
-            
+            assert row.asDict()["count"] == 1, f"Triangle count for vertex {row.id} is not 1"
+
     def test_mutithreaded_sparksession_usage(self):
-        # Test that we can use the GraphFrame API from multiple threads
+        # Test that the GraphFrame API works correctly from multiple threads.
         localVertices = [(1, "A"), (2, "B"), (3, "C")]
         localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")]
         v = self.spark.createDataFrame(localVertices, ["id", "name"])
         e = self.spark.createDataFrame(localEdges, ["src", "dst", "action"])
-        
-        
+
         exc = None
+
         def run_graphframe() -> None:
+            nonlocal exc
             try:
                 GraphFrame(v, e)
             except Exception as _e:
-                nonlocal exc
                 exc = _e
-        
+
         import threading
+
         thread = threading.Thread(target=run_graphframe)
         thread.start()
         thread.join()
-        self.assertIsNone(exc, f"Exception was raised in thread: {exc}")
+        assert exc is None, f"Exception was raised in thread: {exc}"
+
 
+@pytest.mark.usefixtures("set_spark")
+class TestGraphFrameExamples:
 
-class GraphFrameExamplesTest(GraphFrameTestCase):
-    def setUp(self):
-        super(GraphFrameExamplesTest, self).setUp()
+    def setup_method(self, method):
+        # Set up the Java API instance for use in the tests.
         self.japi = _java_api(self.spark._sc)
 
     def test_belief_propagation(self):
-        # create graphical model g of size 3 x 3
+        # Create a graphical model g of size 3x3.
         g = Graphs(self.spark).gridIsingModel(3)
-        # run BP for 5 iterations
+        # Run Belief Propagation (BP) for 5 iterations.
         numIter = 5
         results = BeliefPropagation.runBPwithGraphFrames(g, numIter)
-        # check beliefs are valid
-        for row in results.vertices.select('belief').collect():
-            belief = row['belief']
-            self.assertTrue(
-                0 <= belief <= 1,
-                msg="Expected belief to be probability in [0,1], but found {}".format(belief))
+        # Check that each belief is a valid probability in [0, 1].
+        for row in results.vertices.select("belief").collect():
+            belief = row["belief"]
+            assert (
+                0 <= belief <= 1
+            ), f"Expected belief to be probability in [0,1], but found {belief}"
 
     def test_graph_friends(self):
-        # construct graph
+        # Construct the graph.
         g = Graphs(self.spark).friends()
-        # check that a GraphFrame instance was returned
-        self.assertIsInstance(g, GraphFrame)
+        # Check that the result is an instance of GraphFrame.
+        assert isinstance(g, GraphFrame)
 
     def test_graph_grid_ising_model(self):
-        # construct graph
+        # Construct a grid Ising model graph.
         n = 3
         g = Graphs(self.spark).gridIsingModel(n)
-        # check that all the vertices exist
-        ids = [v['id'] for v in g.vertices.collect()]
+        # Collect the vertex ids.
+        ids = [v["id"] for v in g.vertices.collect()]
+        # Verify that every expected vertex id appears.
         for i in range(n):
             for j in range(n):
-                self.assertIn('{},{}'.format(i, j), ids)
+                expected_id = f"{i},{j}"
+                assert expected_id in ids, f"Vertex {expected_id} not found in {ids}"
diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt
new file mode 100644
index 000000000..b27da4d73
--- /dev/null
+++ b/python/requirements-dev.txt
@@ -0,0 +1,6 @@
+pytest==8.3.4
+Sphinx==8.1.3
+flake8==7.1.1
+isort==6.0.0
+mypy==1.14.1
+pre-commit==4.0.1
diff --git a/python/requirements.txt b/python/requirements.txt
index efb5ec378..fb73319f2 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,3 +1,6 @@
 # This file should list any python package dependencies.
-nose==1.3.7
+pyspark>=2.0.0
+click==8.1.8
 numpy>=1.7
+py7zr==0.22.0
+requests==2.32.3
diff --git a/python/run-tests.sh b/python/run-tests.sh
index af4e0a139..dc496e8b0 100755
--- a/python/run-tests.sh
+++ b/python/run-tests.sh
@@ -38,7 +38,7 @@ echo $pyver
 
 LIBS=""
 for lib in "$SPARK_HOME/python/lib"/*zip ; do
-  LIBS=$LIBS:$lib
+    LIBS=$LIBS:$lib
 done
 
 # The current directory of the script.
@@ -51,7 +51,7 @@ assembly_path="$DIR/../target/scala-$scala_version_major_minor"
 echo `ls $assembly_path/graphframes-assembly*.jar`
 JAR_PATH=""
 for assembly in $assembly_path/graphframes-assembly*.jar ; do
-  JAR_PATH=$assembly
+    JAR_PATH=$assembly
 done
 
 export PYSPARK_SUBMIT_ARGS="--driver-memory 2g --executor-memory 2g --jars $JAR_PATH pyspark-shell "
@@ -62,17 +62,7 @@ export PYTHONPATH=$PYTHONPATH:graphframes
 
 
 # Run test suites
-
-if [[ "$python_major" == "2" ]]; then
-
-  # Horrible hack for spark 1.x: we manually remove some log lines to stay below the 4MB log limit on Travis.
-  $PYSPARK_DRIVER_PYTHON `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
-
-else
-
-  $PYSPARK_DRIVER_PYTHON -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
-
-fi
+$PYSPARK_DRIVER_PYTHON -m "pytest" -v $DIR/graphframes/tests.py 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
 
 # Exit immediately if the tests fail.
 # Since we pipe to remove the output, we need to use some horrible BASH features:
@@ -80,7 +70,6 @@ fi
 test ${PIPESTATUS[0]} -eq 0 || exit 1;
 
 # Run doc tests
-
 cd "$DIR"
 
 $PYSPARK_PYTHON -u ./graphframes/graphframe.py "$@"
diff --git a/python/setup.cfg b/python/setup.cfg
index f127b08af..02a0d5136 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -1,2 +1,42 @@
-# This file contains the default option values to be used during setup. An
-# example can be found at https://github.com/pypa/sampleproject/blob/master/setup.cfg
+[metadata]
+name = graphframes
+version = 0.8.5
+description = GraphFrames: Graph Processing Framework for Apache Spark
+long_description = file: ../README.md
+long_description_content_type = text/markdown
+author = GraphFrames Contributors
+author_email = graphframes@googlegroups.com
+url = https://pypi.org/project/graphframes-py/
+license = Apache License 2.0
+classifiers =
+    Development Status :: 4 - Beta
+    Programming Language :: Python :: 3
+    Operating System :: OS Independent
+
+[options]
+packages = find:
+package_dir =
+    = python
+include_package_data = True
+install_requires =
+    pyspark>=2.0.0
+    click==8.1.8
+    numpy>=1.7
+    py7zr==0.22.0
+    requests==2.32.3
+
+[options.packages.find]
+where = python
+    exclude =
+        tests.py
+        docs
+
+[options.extras_require]
+dev =
+    pytest==8.3.4
+    Sphinx==8.1.3
+    black==25.1.0
+    flake8==7.1.1
+    isort==6.0.0
+    mypy==1.14.1
+    pre-commit==3.5.1
diff --git a/python/setup.py b/python/setup.py
index 9dad5462e..a91fb629a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,2 +1,35 @@
-# Your python setup file. An example can be found at:
-# https://github.com/pypa/sampleproject/blob/master/setup.py
+from setuptools import setup, find_packages  # type: ignore
+import os
+
+
+def parse_requirements(filename):
+    """Load requirements from a pip requirements file."""
+    with open(filename, encoding="utf-8") as f:
+        # Filter out comments and empty lines.
+        return [line.strip() for line in f if line.strip() and not line.startswith("#")]
+
+
+# Read the long description from the README file.
+here = os.path.abspath(os.path.dirname(__file__))
+
+# Use requirements.txt to get the list of dependencies.
+requirements = parse_requirements(os.path.join(here, "requirements.txt"))
+
+setup(
+    name="graphframes",
+    version=open("version.sbt").read().strip(),  # Update this version as needed
+    description="GraphFrames: Graph Processing Framework for Apache Spark",
+    long_description=open(os.path.join(f"{here}/..", "README.md"), encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    author="GraphFrames Contributors",
+    author_email="graphframes@googlegroups.com",
+    url="https://pypi.org/project/graphframes-py",
+    packages=find_packages(where="python"),
+    package_dir={"": "python"},
+    include_package_data=True,  # Include non-code files specified in MANIFEST.in
+    install_requires=requirements,
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "Operating System :: OS Independent",
+    ],
+)
diff --git a/version.sbt b/version.sbt
index f72bdcc0e..6fbb590a4 100644
--- a/version.sbt
+++ b/version.sbt
@@ -1 +1 @@
-ThisBuild / version := "0.8.4"
+ThisBuild / version := "0.8.5"

From c25624474e261d73c1eeb12d35b2604ef6e977cf Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sat, 15 Feb 2025 21:07:24 -0800
Subject: [PATCH 02/53] Restore Python .gitignore

---
 python/.gitignore | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 python/.gitignore

diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 000000000..2130ff922
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,5 @@
+*.pyc
+docs/_build/
+build/
+dist/
+

From 6c3df0b1cdf606ddf8e1aed00edd1d93ffb11220 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sat, 15 Feb 2025 21:08:05 -0800
Subject: [PATCH 03/53] Extra newline removed

---
 python/.gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/.gitignore b/python/.gitignore
index 2130ff922..81410ca55 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -2,4 +2,3 @@
 docs/_build/
 build/
 dist/
-

From caf50911ed5da315ca66134798a28ea240b71a81 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 11:58:23 -0800
Subject: [PATCH 04/53] Added VERSION file set to 0.8.5

---
 VERSION | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 VERSION

diff --git a/VERSION b/VERSION
new file mode 100644
index 000000000..7ada0d303
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.8.5

From 7cfa2d18152e566f39dc57ea5c8e1c6075648542 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 12:40:44 -0800
Subject: [PATCH 05/53] isort; fiex edgesDF variable name.

---
 python/graphframes/tests.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/graphframes/tests.py b/python/graphframes/tests.py
index 259435759..d4269f449 100644
--- a/python/graphframes/tests.py
+++ b/python/graphframes/tests.py
@@ -16,18 +16,18 @@
 #
 
 import os
-import tempfile
-import shutil
 import re
+import shutil
+import tempfile
 
 import pytest
 from pyspark import SparkConf, SparkContext
-from pyspark.sql import functions as F, SparkSession
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as F
 
-from .graphframe import GraphFrame, Pregel, _java_api, _from_java_gf
+from .examples import BeliefPropagation, Graphs
+from .graphframe import GraphFrame, Pregel, _from_java_gf, _java_api
 from .lib import AggregateMessages as AM
-from .examples import Graphs, BeliefPropagation
-
 
 VERSION = open("version.sbt").read().strip()
 
@@ -419,9 +419,9 @@ def test_shortest_paths(self):
         edges = [(1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)]
         # Create bidirectional edges.
         all_edges = [z for (a, b) in edges for z in [(a, b), (b, a)]]
-        edgesDF = self.spark.createDataFrame(all_edges, ["src", "dst"])
+        edges = self.spark.createDataFrame(all_edges, ["src", "dst"])
         vertices = self.spark.createDataFrame([(i,) for i in range(1, 7)], ["id"])
-        g = GraphFrame(vertices, edgesDF)
+        g = GraphFrame(vertices, edges)
         landmarks = [1, 4]
         v2 = g.shortestPaths(landmarks)
         self._df_hasCols(v2, vcols=["id", "distances"])

From a8bf0be4523bc41dd01966f7b525a9ec7c918ede Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:08:48 -0800
Subject: [PATCH 06/53] Back out Dockerfile changes

---
 Dockerfile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b9fe8c528..1c4430912 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,16 @@
 FROM ubuntu:22.04
 
-ARG PYTHON_VERSION=3.9
+ARG PYTHON_VERSION=3.8
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
-    apt-get install -y wget bzip2 build-essential openjdk-11-jdk ssh sudo && \
+    apt-get install -y wget bzip2 build-essential openjdk-8-jdk ssh sudo && \
     apt-get clean
 
 # Install Spark and update env variables.
-ENV SCALA_VERSION 2.12.20
-ENV SPARK_VERSION "3.5.4"
-ENV SPARK_BUILD "spark-${SPARK_VERSION}-bin-hadoop3"
+ENV SCALA_VERSION 2.12.17
+ENV SPARK_VERSION "3.4.1"
+ENV SPARK_BUILD "spark-${SPARK_VERSION}-bin-hadoop3.2"
 ENV SPARK_BUILD_URL "https://dist.apache.org/repos/dist/release/spark/spark-${SPARK_VERSION}/${SPARK_BUILD}.tgz"
 RUN wget --quiet "$SPARK_BUILD_URL" -O /tmp/spark.tgz && \
     tar -C /opt -xf /tmp/spark.tgz && \

From 54a942da4a572471eead3e5df4f721096d638ca6 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:21:47 -0800
Subject: [PATCH 07/53] Back out version change in build.sbt

---
 build.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sbt b/build.sbt
index c6b503989..4ee4d9bd5 100644
--- a/build.sbt
+++ b/build.sbt
@@ -3,7 +3,7 @@ import ReleaseTransformations.*
 lazy val sparkVer = sys.props.getOrElse("spark.version", "3.5.4")
 lazy val sparkBranch = sparkVer.substring(0, 3)
 lazy val defaultScalaVer = sparkBranch match {
-  case "3.5" => "2.12.20"
+  case "3.5" => "2.12.18"
   case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.")
 }
 lazy val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer)

From 8b0e34697928ad0cb1b07cba96c0d0657a0d771b Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:23:35 -0800
Subject: [PATCH 08/53] Backout changes to config and run-tests

---
 docs/_config.yml    |  2 +-
 python/run-tests.sh | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/docs/_config.yml b/docs/_config.yml
index 379fc242f..4c1ab075c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -13,7 +13,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-GRAPHFRAMES_VERSION: 0.8.5
+GRAPHFRAMES_VERSION: 0.8.4
 #SCALA_BINARY_VERSION: "2.10"
 #SCALA_VERSION: "2.10.4"
 #MESOS_VERSION: 0.21.0
diff --git a/python/run-tests.sh b/python/run-tests.sh
index dc496e8b0..af4e0a139 100755
--- a/python/run-tests.sh
+++ b/python/run-tests.sh
@@ -38,7 +38,7 @@ echo $pyver
 
 LIBS=""
 for lib in "$SPARK_HOME/python/lib"/*zip ; do
-    LIBS=$LIBS:$lib
+  LIBS=$LIBS:$lib
 done
 
 # The current directory of the script.
@@ -51,7 +51,7 @@ assembly_path="$DIR/../target/scala-$scala_version_major_minor"
 echo `ls $assembly_path/graphframes-assembly*.jar`
 JAR_PATH=""
 for assembly in $assembly_path/graphframes-assembly*.jar ; do
-    JAR_PATH=$assembly
+  JAR_PATH=$assembly
 done
 
 export PYSPARK_SUBMIT_ARGS="--driver-memory 2g --executor-memory 2g --jars $JAR_PATH pyspark-shell "
@@ -62,7 +62,17 @@ export PYTHONPATH=$PYTHONPATH:graphframes
 
 
 # Run test suites
-$PYSPARK_DRIVER_PYTHON -m "pytest" -v $DIR/graphframes/tests.py 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+
+if [[ "$python_major" == "2" ]]; then
+
+  # Horrible hack for spark 1.x: we manually remove some log lines to stay below the 4MB log limit on Travis.
+  $PYSPARK_DRIVER_PYTHON `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+
+else
+
+  $PYSPARK_DRIVER_PYTHON -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+
+fi
 
 # Exit immediately if the tests fail.
 # Since we pipe to remove the output, we need to use some horrible BASH features:
@@ -70,6 +80,7 @@ $PYSPARK_DRIVER_PYTHON -m "pytest" -v $DIR/graphframes/tests.py 2>&1 | grep -vE
 test ${PIPESTATUS[0]} -eq 0 || exit 1;
 
 # Run doc tests
+
 cd "$DIR"
 
 $PYSPARK_PYTHON -u ./graphframes/graphframe.py "$@"

From 46c2b9300ace8c95ba482e6582631b79a348705c Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:24:19 -0800
Subject: [PATCH 09/53] Back out pytest conversion

---
 python/graphframes/tests.py | 409 ++++++++++++++++--------------------
 1 file changed, 182 insertions(+), 227 deletions(-)

diff --git a/python/graphframes/tests.py b/python/graphframes/tests.py
index d4269f449..9a7ad1371 100644
--- a/python/graphframes/tests.py
+++ b/python/graphframes/tests.py
@@ -15,72 +15,63 @@
 # limitations under the License.
 #
 
-import os
-import re
-import shutil
+import sys
 import tempfile
+import shutil
+import re
 
-import pytest
-from pyspark import SparkConf, SparkContext
-from pyspark.sql import SparkSession
-from pyspark.sql import functions as F
-
-from .examples import BeliefPropagation, Graphs
-from .graphframe import GraphFrame, Pregel, _from_java_gf, _java_api
-from .lib import AggregateMessages as AM
-
-VERSION = open("version.sbt").read().strip()
-
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
 
-@pytest.fixture(scope="class", autouse=True)
-def set_spark(request, spark_session):
-    request.cls.spark = spark_session
+from pyspark import SparkContext
+from pyspark.sql import functions as sqlfunctions, SparkSession
 
+from .graphframe import GraphFrame, Pregel, _java_api, _from_java_gf
+from .lib import AggregateMessages as AM
+from .examples import Graphs, BeliefPropagation
 
-@pytest.mark.usefixtures("set_spark")
 class GraphFrameTestUtils(object):
 
     @classmethod
     def parse_spark_version(cls, version_str):
-        """take an input version string
-        return version items in a dictionary
+        """ take an input version string
+            return version items in a dictionary
         """
-        _sc_ver_patt = r"(\d+)\.(\d+)(\.(\d+)(-(.+))?)?"
+        _sc_ver_patt = r'(\d+)\.(\d+)(\.(\d+)(-(.+))?)?'
         m = re.match(_sc_ver_patt, version_str)
         if not m:
-            raise TypeError(
-                "version {} shoud be in <major>.<minor>.<maintenance>".format(version_str)
-            )
+            raise TypeError("version {} shoud be in <major>.<minor>.<maintenance>".format(version_str))
         version_info = {}
         try:
-            version_info["major"] = int(m.group(1))
+            version_info['major'] = int(m.group(1))
         except:
             raise TypeError("invalid minor version")
         try:
-            version_info["minor"] = int(m.group(2))
+            version_info['minor'] = int(m.group(2))
         except:
             raise TypeError("invalid major version")
         try:
-            version_info["maintenance"] = int(m.group(4))
+            version_info['maintenance'] = int(m.group(4))
         except:
-            version_info["maintenance"] = 0
+            version_info['maintenance'] = 0
         try:
-            version_info["special"] = m.group(6)
+            version_info['special'] = m.group(6)
         except:
             pass
         return version_info
 
     @classmethod
     def createSparkContext(cls):
-        cls.conf = SparkConf().setAppName("GraphFramesTests")
-        cls.conf.set(
-            "spark.submit.pyFiles",
-            os.path.abspath("python/dist/graphframes-{VERSION}-py3-none-any.whl"),
-        )
-        cls.sc = SparkContext(master="local[4]", appName="GraphFramesTests", conf=cls.conf)
+        cls.sc = sc = SparkContext('local[4]', "GraphFramesTests")
         cls.checkpointDir = tempfile.mkdtemp()
         cls.sc.setCheckpointDir(cls.checkpointDir)
-        cls.spark_version = cls.parse_spark_version(cls.sc.version)
+        cls.spark_version = cls.parse_spark_version(sc.version)
 
     @classmethod
     def stopSparkContext(cls):
@@ -90,10 +81,10 @@ def stopSparkContext(cls):
 
     @classmethod
     def spark_at_least_of_version(cls, version_str):
-        assert hasattr(cls, "spark_version")
+        assert hasattr(cls, 'spark_version')
         required_version = cls.parse_spark_version(version_str)
         spark_version = cls.spark_version
-        for _name in ["major", "minor", "maintenance"]:
+        for _name in ['major', 'minor', 'maintenance']:
             sc_ver = spark_version[_name]
             req_ver = required_version[_name]
             if sc_ver != req_ver:
@@ -101,31 +92,28 @@ def spark_at_least_of_version(cls, version_str):
         # All major.minor.maintenance equal
         return True
 
-
-@pytest.fixture(scope="module", autouse=True)
-def spark_context():
+def setUpModule():
     GraphFrameTestUtils.createSparkContext()
-    yield
+
+def tearDownModule():
     GraphFrameTestUtils.stopSparkContext()
 
 
-@pytest.fixture(scope="class")
-def spark_session():
-    # Create a SparkSession with a smaller number of shuffle partitions.
-    spark = (
-        SparkSession(GraphFrameTestUtils.sc)
-        .builder.config("spark.sql.shuffle.partitions", 4)
-        .getOrCreate()
-    )
-    yield spark
-    # No explicit stop; SparkContext shutdown will clean up.
+class GraphFrameTestCase(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # Small tests run much faster with spark.sql.shuffle.partitions = 4
+        cls.spark = SparkSession(GraphFrameTestUtils.sc).builder.config('spark.sql.shuffle.partitions', 4).getOrCreate()
 
+    @classmethod
+    def tearDownClass(cls):
+        cls.spark = None
 
-@pytest.mark.usefixtures("set_spark")
-class GraphFrameTest:
 
-    def setup_method(self, method):
-        # Mimic setUp: create a simple GraphFrame instance for each test.
+class GraphFrameTest(GraphFrameTestCase):
+    def setUp(self):
+        super(GraphFrameTest, self).setUp()
         localVertices = [(1, "A"), (2, "B"), (3, "C")]
         localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")]
         v = self.spark.createDataFrame(localVertices, ["id", "name"])
@@ -135,38 +123,28 @@ def setup_method(self, method):
     def test_spark_version_check(self):
         gtu = GraphFrameTestUtils
         gtu.spark_version = gtu.parse_spark_version("2.0.2")
-
-        assert gtu.spark_at_least_of_version("1.7")
-        assert gtu.spark_at_least_of_version("2.0")
-        assert gtu.spark_at_least_of_version("2.0.1")
-        assert gtu.spark_at_least_of_version("2.0.2")
-        assert not gtu.spark_at_least_of_version("2.0.3")
-        assert not gtu.spark_at_least_of_version("2.1")
+        self.assertTrue(gtu.spark_at_least_of_version("1.7"))
+        self.assertTrue(gtu.spark_at_least_of_version("2.0"))
+        self.assertTrue(gtu.spark_at_least_of_version("2.0.1"))
+        self.assertTrue(gtu.spark_at_least_of_version("2.0.2"))
+        self.assertFalse(gtu.spark_at_least_of_version("2.0.3"))
+        self.assertFalse(gtu.spark_at_least_of_version("2.1"))
 
     def test_construction(self):
         g = self.g
-        vertexIDs = [row[0] for row in g.vertices.select("id").collect()]
+        vertexIDs = map(lambda x: x[0], g.vertices.select("id").collect())
         assert sorted(vertexIDs) == [1, 2, 3]
-
-        edgeActions = [row[0] for row in g.edges.select("action").collect()]
+        edgeActions = map(lambda x: x[0], g.edges.select("action").collect())
         assert sorted(edgeActions) == ["follow", "hate", "love"]
-
-        tripletsFirst = list(
-            map(
-                lambda x: (x[0][1], x[1][1], x[2][2]),
-                g.triplets.sort("src.id").select("src", "dst", "edge").take(1),
-            )
-        )
+        tripletsFirst = list(map(lambda x: (x[0][1], x[1][1], x[2][2]),
+                            g.triplets.sort("src.id").select("src", "dst", "edge").take(1)))
         assert tripletsFirst == [("A", "B", "love")], tripletsFirst
-
         # Try with invalid vertices and edges DataFrames
         v_invalid = self.spark.createDataFrame(
-            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"]
-        )
+            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"])
         e_invalid = self.spark.createDataFrame(
-            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"]
-        )
-        with pytest.raises(ValueError):
+            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"])
+        with self.assertRaises(ValueError):
             GraphFrame(v_invalid, e_invalid)
 
     def test_cache(self):
@@ -177,17 +155,17 @@ def test_cache(self):
     def test_degrees(self):
         g = self.g
         outDeg = g.outDegrees
-        assert set(outDeg.columns) == {"id", "outDegree"}
+        self.assertSetEqual(set(outDeg.columns), {"id", "outDegree"})
         inDeg = g.inDegrees
-        assert set(inDeg.columns) == {"id", "inDegree"}
+        self.assertSetEqual(set(inDeg.columns), {"id", "inDegree"})
         deg = g.degrees
-        assert set(deg.columns) == {"id", "degree"}
+        self.assertSetEqual(set(deg.columns), {"id", "degree"})
 
     def test_motif_finding(self):
         g = self.g
         motifs = g.find("(a)-[e]->(b)")
         assert motifs.count() == 3
-        assert set(motifs.columns) == {"a", "e", "b"}
+        self.assertSetEqual(set(motifs.columns), {"a", "e", "b"})
 
     def test_filterVertices(self):
         g = self.g
@@ -200,8 +178,8 @@ def test_filterVertices(self):
             e2 = g2.edges.select("src", "dst", "action").collect()
             assert len(v2) == len(expected_v)
             assert len(e2) == len(expected_e)
-            assert set(v2) == set(expected_v)
-            assert set(e2) == set(expected_e)
+            self.assertSetEqual(set(v2), set(expected_v))
+            self.assertSetEqual(set(e2), set(expected_e))
 
     def test_filterEdges(self):
         g = self.g
@@ -214,8 +192,8 @@ def test_filterEdges(self):
             e2 = g2.edges.select("src", "dst", "action").collect()
             assert len(v2) == len(expected_v)
             assert len(e2) == len(expected_e)
-            assert set(v2) == set(expected_v)
-            assert set(e2) == set(expected_e)
+            self.assertSetEqual(set(v2), set(expected_v))
+            self.assertSetEqual(set(e2), set(expected_e))
 
     def test_dropIsolatedVertices(self):
         g = self.g
@@ -226,93 +204,74 @@ def test_dropIsolatedVertices(self):
         expected_e = [(2, 3, "follow")]
         assert len(v2) == len(expected_v)
         assert len(e2) == len(expected_e)
-        assert set(v2) == set(expected_v)
-        assert set(e2) == set(expected_e)
+        self.assertSetEqual(set(v2), set(expected_v))
+        self.assertSetEqual(set(e2), set(expected_e))
 
     def test_bfs(self):
         g = self.g
         paths = g.bfs("name='A'", "name='C'")
-        assert paths.count() == 1
-        # Expecting that the first intermediary vertex in the BFS is "B"
-        assert paths.select("v1.name").head()[0] == "B"
-
+        self.assertEqual(paths.count(), 1)
+        self.assertEqual(paths.select("v1.name").head()[0], "B")
         paths2 = g.bfs("name='A'", "name='C'", edgeFilter="action!='follow'")
-        assert paths2.count() == 0
-
+        self.assertEqual(paths2.count(), 0)
         paths3 = g.bfs("name='A'", "name='C'", maxPathLength=1)
-        assert paths3.count() == 0
+        self.assertEqual(paths3.count(), 0)
 
 
-@pytest.mark.usefixtures("set_spark")
-class TestPregel:
+class PregelTest(GraphFrameTestCase):
+    def setUp(self):
+        super(PregelTest, self).setUp()
 
     def test_page_rank(self):
-        # Create an edge DataFrame; note that vertex 3 has no in-links.
-        edges = self.spark.createDataFrame(
-            [[0, 1], [1, 2], [2, 4], [2, 0], [3, 4], [4, 0], [4, 2]],
-            ["src", "dst"],
-        )
+        from pyspark.sql.functions import coalesce, col, lit, sum, when
+        edges = self.spark.createDataFrame([[0, 1],
+                                          [1, 2],
+                                          [2, 4],
+                                          [2, 0],
+                                          [3, 4], # 3 has no in-links
+                                          [4, 0],
+                                          [4, 2]], ["src", "dst"])
         edges.cache()
-
-        # Create a vertex DataFrame and count vertices.
         vertices = self.spark.createDataFrame([[0], [1], [2], [3], [4]], ["id"])
         numVertices = vertices.count()
-
-        # Get the outDegrees DataFrame from a GraphFrame built on the original vertices and edges.
         vertices = GraphFrame(vertices, edges).outDegrees
         vertices.cache()
-
-        # Construct a new GraphFrame with the updated vertices DataFrame.
         graph = GraphFrame(vertices, edges)
         alpha = 0.15
-
-        # Run PageRank via Pregel.
-        ranks = (
-            graph.pregel.setMaxIter(5)
-            .withVertexColumn(
-                "rank",
-                F.lit(1.0 / numVertices),
-                F.coalesce(Pregel.msg(), F.lit(0.0)) * F.lit(1.0 - alpha)
-                + F.lit(alpha / numVertices),
-            )
-            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree"))
-            .aggMsgs(F.sum(Pregel.msg()))
+        ranks = graph.pregel \
+            .setMaxIter(5) \
+            .withVertexColumn("rank", lit(1.0 / numVertices),
+                              coalesce(Pregel.msg(),
+                                       lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices)) \
+            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \
+            .aggMsgs(sum(Pregel.msg())) \
             .run()
-        )
-
-        # Collect and sort results.
         resultRows = ranks.sort(ranks.id).collect()
-        result = list(map(lambda x: x.rank, resultRows))
+        result = map(lambda x: x.rank, resultRows)
         expected = [0.245, 0.224, 0.303, 0.03, 0.197]
-
-        # Compare each result with its expected value using a tolerance of 1e-3.
         for a, b in zip(result, expected):
-            assert a == pytest.approx(b, abs=1e-3)
-
+            self.assertAlmostEqual(a, b, delta = 1e-3)
 
-@pytest.mark.usefixtures("set_spark")
-class TestGraphFrameLib:
 
-    def setup_method(self, method):
-        # Set up the Java API instance for each test.
+class GraphFrameLibTest(GraphFrameTestCase):
+    def setUp(self):
+        super(GraphFrameLibTest, self).setUp()
         self.japi = _java_api(self.spark._sc)
 
-    def _hasCols(self, graph, vcols=[], ecols=[]):
-        for c in vcols:
-            assert c in graph.vertices.columns, f"Vertex DataFrame missing column: {c}"
-        for c in ecols:
-            assert c in graph.edges.columns, f"Edge DataFrame missing column: {c}"
+    def _hasCols(self, graph, vcols = [], ecols = []):
+        map(lambda c: self.assertIn(c, graph.vertices.columns), vcols)
+        map(lambda c: self.assertIn(c, graph.edges.columns), ecols)
 
-    def _df_hasCols(self, df, vcols=[]):
-        for c in vcols:
-            assert c in df.columns, f"DataFrame missing column: {c}"
+    def _df_hasCols(self, vertices, vcols = []):
+        map(lambda c: self.assertIn(c, vertices.columns), vcols)
 
     def _graph(self, name, *args):
         """
-        Convenience to call one of the example graphs, passing the arguments and wrapping the result as a Python object.
-        :param name: the name of the example graph.
-        :param args: all the required arguments (excluding the initial SparkSession).
-        :return: a GraphFrame object.
+        Convenience to call one of the example graphs, passing the arguments and wrapping the result back
+        as a python object.
+        :param name: the name of the example graph
+        :param args: all the required arguments, without the initial spark session
+        :return:
         """
         examples = self.japi.examples()
         jgraph = getattr(examples, name)(*args)
@@ -322,79 +281,83 @@ def test_aggregate_messages(self):
         g = self._graph("friends")
         # For each user, sum the ages of the adjacent users,
         # plus 1 for the src's sum if the edge is "friend".
-        sendToSrc = AM.dst["age"] + F.when(AM.edge["relationship"] == "friend", F.lit(1)).otherwise(
-            0
-        )
-        sendToDst = AM.src["age"]
+        sendToSrc = (
+            AM.dst['age'] +
+            sqlfunctions.when(
+                AM.edge['relationship'] == 'friend',
+                sqlfunctions.lit(1)
+            ).otherwise(0))
+        sendToDst = AM.src['age']
         agg = g.aggregateMessages(
-            F.sum(AM.msg).alias("summedAges"), sendToSrc=sendToSrc, sendToDst=sendToDst
-        )
-        # Run the aggregation again using SQL expressions as Strings.
+            sqlfunctions.sum(AM.msg).alias('summedAges'),
+            sendToSrc=sendToSrc,
+            sendToDst=sendToDst)
+        # Run the aggregation again providing SQL expressions as String instead.
         agg2 = g.aggregateMessages(
             "sum(MSG) AS `summedAges`",
             sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)",
-            sendToDst="src['age']",
-        )
-        # Build mappings from id to the aggregated message.
-        aggMap = {row.id: row.summedAges for row in agg.select("id", "summedAges").collect()}
-        agg2Map = {row.id: row.summedAges for row in agg2.select("id", "summedAges").collect()}
-        # Compute the expected aggregation via brute force.
-        user2age = {row.id: row.age for row in g.vertices.select("id", "age").collect()}
+            sendToDst="src['age']")
+        # Convert agg and agg2 to a mapping from id to the aggregated message.
+        aggMap = {id_: s for id_, s in agg.select('id', 'summedAges').collect()}
+        agg2Map = {id_: s for id_, s in agg2.select('id', 'summedAges').collect()}
+        # Compute the truth via brute force.
+        user2age = {id_: age for id_, age in g.vertices.select('id', 'age').collect()}
         trueAgg = {}
-        for row in g.edges.select("src", "dst", "relationship").collect():
-            src, dst, rel = row.src, row.dst, row.relationship
-            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == "friend" else 0)
+        for src, dst, rel in g.edges.select("src", "dst", "relationship").collect():
+            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == 'friend' else 0)
             trueAgg[dst] = trueAgg.get(dst, 0) + user2age[src]
-        # Verify both aggregations match the expected results.
-        assert aggMap == trueAgg, f"aggMap {aggMap} does not equal expected {trueAgg}"
-        assert agg2Map == trueAgg, f"agg2Map {agg2Map} does not equal expected {trueAgg}"
-        # Check that passing a wrong type for messages raises a TypeError.
-        with pytest.raises(TypeError):
+        # Compare if the agg mappings match the brute force mapping
+        self.assertEqual(aggMap, trueAgg)
+        self.assertEqual(agg2Map, trueAgg)
+        # Check that TypeError is raises with messages of wrong type
+        with self.assertRaises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`", sendToSrc=object(), sendToDst="src['age']"
-            )
-        with pytest.raises(TypeError):
+                "sum(MSG) AS `summedAges`",
+                sendToSrc=object(),
+                sendToDst="src['age']")
+        with self.assertRaises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`", sendToSrc=F.col("dst")["age"], sendToDst=object()
-            )
+                "sum(MSG) AS `summedAges`",
+                sendToSrc=dst['age'],
+                sendToDst=object())
 
     def test_connected_components(self):
-        v = self.spark.createDataFrame([(0, "a", "b")], ["id", "vattr", "gender"])
+        v = self.spark.createDataFrame([
+        (0, "a", "b")], ["id", "vattr", "gender"])
         e = self.spark.createDataFrame([(0, 0, 1)], ["src", "dst", "test"]).filter("src > 10")
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=["id", "component", "vattr", "gender"])
-        assert comps.count() == 1
+        self._df_hasCols(comps, vcols=['id', 'component', 'vattr', 'gender'])
+        self.assertEqual(comps.count(), 1)
 
     def test_connected_components2(self):
         v = self.spark.createDataFrame([(0, "a0", "b0"), (1, "a1", "b1")], ["id", "A", "B"])
         e = self.spark.createDataFrame([(0, 1, "a01", "b01")], ["src", "dst", "A", "B"])
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=["id", "component", "A", "B"])
-        assert comps.count() == 2
+        self._df_hasCols(comps, vcols=['id', 'component', 'A', 'B'])
+        self.assertEqual(comps.count(), 2)
 
     def test_connected_components_friends(self):
         g = self._graph("friends")
-        comps_tests = [
-            g.connectedComponents(),
-            g.connectedComponents(broadcastThreshold=1),
-            g.connectedComponents(checkpointInterval=0),
-            g.connectedComponents(checkpointInterval=10),
-            g.connectedComponents(algorithm="graphx"),
-        ]
+        comps_tests = []
+        comps_tests += [g.connectedComponents()]
+        comps_tests += [g.connectedComponents(broadcastThreshold=1)]
+        comps_tests += [g.connectedComponents(checkpointInterval=0)]
+        comps_tests += [g.connectedComponents(checkpointInterval=10)]
+        comps_tests += [g.connectedComponents(algorithm="graphx")]
         for c in comps_tests:
-            assert c.groupBy("component").count().count() == 2
+            self.assertEqual(c.groupBy("component").count().count(), 2)
 
     def test_label_progagation(self):
         n = 5
         g = self._graph("twoBlobs", n)
         labels = g.labelPropagation(maxIter=4 * n)
         labels1 = labels.filter("id < 5").select("label").collect()
-        all1 = {row.label for row in labels1}
+        all1 = set([x.label for x in labels1])
         assert len(all1) == 1
         labels2 = labels.filter("id >= 5").select("label").collect()
-        all2 = {row.label for row in labels2}
+        all2 = set([x.label for x in labels2])
         assert len(all2) == 1
         assert all1 != all2
 
@@ -404,7 +367,7 @@ def test_page_rank(self):
         resetProb = 0.15
         errorTol = 1.0e-5
         pr = g.pageRank(resetProb, tol=errorTol)
-        self._hasCols(pr, vcols=["id", "pagerank"], ecols=["src", "dst", "weight"])
+        self._hasCols(pr, vcols=['id', 'pagerank'], ecols=['src', 'dst', 'weight'])
 
     def test_parallel_personalized_page_rank(self):
         n = 100
@@ -413,11 +376,10 @@ def test_parallel_personalized_page_rank(self):
         maxIter = 15
         sourceIds = [1, 2, 3, 4]
         pr = g.parallelPersonalizedPageRank(resetProb, sourceIds=sourceIds, maxIter=maxIter)
-        self._hasCols(pr, vcols=["id", "pageranks"], ecols=["src", "dst", "weight"])
+        self._hasCols(pr, vcols=['id', 'pageranks'], ecols=['src', 'dst', 'weight'])
 
     def test_shortest_paths(self):
         edges = [(1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)]
-        # Create bidirectional edges.
         all_edges = [z for (a, b) in edges for z in [(a, b), (b, a)]]
         edges = self.spark.createDataFrame(all_edges, ["src", "dst"])
         vertices = self.spark.createDataFrame([(i,) for i in range(1, 7)], ["id"])
@@ -428,19 +390,17 @@ def test_shortest_paths(self):
 
     def test_svd_plus_plus(self):
         g = self._graph("ALSSyntheticData")
-        v2, cost = g.svdPlusPlus()
-        self._df_hasCols(v2, vcols=["id", "column1", "column2", "column3", "column4"])
+        (v2, cost) = g.svdPlusPlus()
+        self._df_hasCols(v2, vcols=['id', 'column1', 'column2', 'column3', 'column4'])
 
     def test_strongly_connected_components(self):
-        # Simple island test.
+        # Simple island test
         vertices = self.spark.createDataFrame([(i,) for i in range(1, 6)], ["id"])
         edges = self.spark.createDataFrame([(7, 8)], ["src", "dst"])
         g = GraphFrame(vertices, edges)
         c = g.stronglyConnectedComponents(5)
         for row in c.collect():
-            assert (
-                row.id == row.component
-            ), f"Vertex {row.id} not equal to its component {row.component}"
+            self.assertEqual(row.id, row.component)
 
     def test_triangle_counts(self):
         edges = self.spark.createDataFrame([(0, 1), (1, 2), (2, 0)], ["src", "dst"])
@@ -448,66 +408,61 @@ def test_triangle_counts(self):
         g = GraphFrame(vertices, edges)
         c = g.triangleCount()
         for row in c.select("id", "count").collect():
-            assert row.asDict()["count"] == 1, f"Triangle count for vertex {row.id} is not 1"
-
+            self.assertEqual(row.asDict()['count'], 1)
+            
     def test_mutithreaded_sparksession_usage(self):
-        # Test that the GraphFrame API works correctly from multiple threads.
+        # Test that we can use the GraphFrame API from multiple threads
         localVertices = [(1, "A"), (2, "B"), (3, "C")]
         localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")]
         v = self.spark.createDataFrame(localVertices, ["id", "name"])
         e = self.spark.createDataFrame(localEdges, ["src", "dst", "action"])
-
+        
+        
         exc = None
-
         def run_graphframe() -> None:
-            nonlocal exc
             try:
                 GraphFrame(v, e)
             except Exception as _e:
+                nonlocal exc
                 exc = _e
-
+        
         import threading
-
         thread = threading.Thread(target=run_graphframe)
         thread.start()
         thread.join()
-        assert exc is None, f"Exception was raised in thread: {exc}"
-
+        self.assertIsNone(exc, f"Exception was raised in thread: {exc}")
 
-@pytest.mark.usefixtures("set_spark")
-class TestGraphFrameExamples:
 
-    def setup_method(self, method):
-        # Set up the Java API instance for use in the tests.
+class GraphFrameExamplesTest(GraphFrameTestCase):
+    def setUp(self):
+        super(GraphFrameExamplesTest, self).setUp()
         self.japi = _java_api(self.spark._sc)
 
     def test_belief_propagation(self):
-        # Create a graphical model g of size 3x3.
+        # create graphical model g of size 3 x 3
         g = Graphs(self.spark).gridIsingModel(3)
-        # Run Belief Propagation (BP) for 5 iterations.
+        # run BP for 5 iterations
         numIter = 5
         results = BeliefPropagation.runBPwithGraphFrames(g, numIter)
-        # Check that each belief is a valid probability in [0, 1].
-        for row in results.vertices.select("belief").collect():
-            belief = row["belief"]
-            assert (
-                0 <= belief <= 1
-            ), f"Expected belief to be probability in [0,1], but found {belief}"
+        # check beliefs are valid
+        for row in results.vertices.select('belief').collect():
+            belief = row['belief']
+            self.assertTrue(
+                0 <= belief <= 1,
+                msg="Expected belief to be probability in [0,1], but found {}".format(belief))
 
     def test_graph_friends(self):
-        # Construct the graph.
+        # construct graph
         g = Graphs(self.spark).friends()
-        # Check that the result is an instance of GraphFrame.
-        assert isinstance(g, GraphFrame)
+        # check that a GraphFrame instance was returned
+        self.assertIsInstance(g, GraphFrame)
 
     def test_graph_grid_ising_model(self):
-        # Construct a grid Ising model graph.
+        # construct graph
         n = 3
         g = Graphs(self.spark).gridIsingModel(n)
-        # Collect the vertex ids.
-        ids = [v["id"] for v in g.vertices.collect()]
-        # Verify that every expected vertex id appears.
+        # check that all the vertices exist
+        ids = [v['id'] for v in g.vertices.collect()]
         for i in range(n):
             for j in range(n):
-                expected_id = f"{i},{j}"
-                assert expected_id in ids, f"Vertex {expected_id} not found in {ids}"
+                self.assertIn('{},{}'.format(i, j), ids)

From 18b5da033042e328c08062c03002fba1c7ab7a75 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:27:42 -0800
Subject: [PATCH 10/53] Back out version changes to make nose tests pass

---
 .github/workflows/python-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 36b6b97e7..157b328f1 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -7,8 +7,8 @@ jobs:
       matrix:
         include:
           - spark-version: 3.5.4
-            scala-version: 2.12.20
-            python-version: 3.11.11
+            scala-version: 2.12.18
+            python-version: 3.9.19
     runs-on: ubuntu-22.04
     env:
       # define Java options for both official sbt and sbt-extras

From 8eca097f11c75c82dd72b2d5de596c935192f400 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:30:24 -0800
Subject: [PATCH 11/53] Remove changes to requirements

---
 python/requirements.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/requirements.txt b/python/requirements.txt
index fb73319f2..3db67f231 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,6 +1,4 @@
 # This file should list any python package dependencies.
 pyspark>=2.0.0
-click==8.1.8
 numpy>=1.7
 py7zr==0.22.0
-requests==2.32.3

From 277c06fe75fe288657f64778e78d9b0a9712a2ae Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:31:16 -0800
Subject: [PATCH 12/53] Put nose back in requirements.txt

---
 python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/requirements.txt b/python/requirements.txt
index 3db67f231..9893b3cb1 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,4 +1,4 @@
 # This file should list any python package dependencies.
+nose==1.3.7
 pyspark>=2.0.0
 numpy>=1.7
-py7zr==0.22.0

From b55ee4881849a882717bc0035902a855817c7113 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:31:51 -0800
Subject: [PATCH 13/53] Remove version bump to version.sbt

---
 version.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version.sbt b/version.sbt
index 6fbb590a4..f72bdcc0e 100644
--- a/version.sbt
+++ b/version.sbt
@@ -1 +1 @@
-ThisBuild / version := "0.8.5"
+ThisBuild / version := "0.8.4"

From f8a8fd9ea062ad2e0504ddb7e7e22b70d5c7b013 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 13:39:29 -0800
Subject: [PATCH 14/53] Remove packages related to testing

---
 python/requirements-dev.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt
index b27da4d73..6e596dc62 100644
--- a/python/requirements-dev.txt
+++ b/python/requirements-dev.txt
@@ -1,5 +1,3 @@
-pytest==8.3.4
-Sphinx==8.1.3
 flake8==7.1.1
 isort==6.0.0
 mypy==1.14.1

From bc2cb36e7f1012c10b12907ff3b6b284e9c9b6c3 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 14:15:36 -0800
Subject: [PATCH 15/53] Remove old setup.py / setup.cfg

---
 python/setup.cfg | 42 ------------------------------------------
 python/setup.py  | 35 -----------------------------------
 2 files changed, 77 deletions(-)
 delete mode 100644 python/setup.cfg
 delete mode 100644 python/setup.py

diff --git a/python/setup.cfg b/python/setup.cfg
deleted file mode 100644
index 02a0d5136..000000000
--- a/python/setup.cfg
+++ /dev/null
@@ -1,42 +0,0 @@
-[metadata]
-name = graphframes
-version = 0.8.5
-description = GraphFrames: Graph Processing Framework for Apache Spark
-long_description = file: ../README.md
-long_description_content_type = text/markdown
-author = GraphFrames Contributors
-author_email = graphframes@googlegroups.com
-url = https://pypi.org/project/graphframes-py/
-license = Apache License 2.0
-classifiers =
-    Development Status :: 4 - Beta
-    Programming Language :: Python :: 3
-    Operating System :: OS Independent
-
-[options]
-packages = find:
-package_dir =
-    = python
-include_package_data = True
-install_requires =
-    pyspark>=2.0.0
-    click==8.1.8
-    numpy>=1.7
-    py7zr==0.22.0
-    requests==2.32.3
-
-[options.packages.find]
-where = python
-    exclude =
-        tests.py
-        docs
-
-[options.extras_require]
-dev =
-    pytest==8.3.4
-    Sphinx==8.1.3
-    black==25.1.0
-    flake8==7.1.1
-    isort==6.0.0
-    mypy==1.14.1
-    pre-commit==3.5.1
diff --git a/python/setup.py b/python/setup.py
deleted file mode 100644
index a91fb629a..000000000
--- a/python/setup.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from setuptools import setup, find_packages  # type: ignore
-import os
-
-
-def parse_requirements(filename):
-    """Load requirements from a pip requirements file."""
-    with open(filename, encoding="utf-8") as f:
-        # Filter out comments and empty lines.
-        return [line.strip() for line in f if line.strip() and not line.startswith("#")]
-
-
-# Read the long description from the README file.
-here = os.path.abspath(os.path.dirname(__file__))
-
-# Use requirements.txt to get the list of dependencies.
-requirements = parse_requirements(os.path.join(here, "requirements.txt"))
-
-setup(
-    name="graphframes",
-    version=open("version.sbt").read().strip(),  # Update this version as needed
-    description="GraphFrames: Graph Processing Framework for Apache Spark",
-    long_description=open(os.path.join(f"{here}/..", "README.md"), encoding="utf-8").read(),
-    long_description_content_type="text/markdown",
-    author="GraphFrames Contributors",
-    author_email="graphframes@googlegroups.com",
-    url="https://pypi.org/project/graphframes-py",
-    packages=find_packages(where="python"),
-    package_dir={"": "python"},
-    include_package_data=True,  # Include non-code files specified in MANIFEST.in
-    install_requires=requirements,
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "Operating System :: OS Independent",
-    ],
-)

From 728be33b6dfd2ee7135711df752289966afdbc2a Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 14:15:46 -0800
Subject: [PATCH 16/53] New pyproject.toml and poetry.lock

---
 python/poetry.lock    | 360 ++++++++++++++++++++++++++++++++++++++++++
 python/pyproject.toml |  38 +++++
 2 files changed, 398 insertions(+)
 create mode 100644 python/poetry.lock
 create mode 100644 python/pyproject.toml

diff --git a/python/poetry.lock b/python/poetry.lock
new file mode 100644
index 000000000..6eb61618d
--- /dev/null
+++ b/python/poetry.lock
@@ -0,0 +1,360 @@
+# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
+
+[[package]]
+name = "black"
+version = "25.1.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
+    {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
+    {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"},
+    {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"},
+    {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"},
+    {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"},
+    {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"},
+    {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"},
+    {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"},
+    {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"},
+    {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"},
+    {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"},
+    {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"},
+    {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"},
+    {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"},
+    {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"},
+    {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"},
+    {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"},
+    {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"},
+    {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"},
+    {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"},
+    {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.10)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "click"
+version = "8.1.8"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
+    {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["dev"]
+markers = "platform_system == \"Windows\""
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "flake8"
+version = "7.1.2"
+description = "the modular source code checker: pep8 pyflakes and co"
+optional = false
+python-versions = ">=3.8.1"
+groups = ["dev"]
+files = [
+    {file = "flake8-7.1.2-py2.py3-none-any.whl", hash = "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a"},
+    {file = "flake8-7.1.2.tar.gz", hash = "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd"},
+]
+
+[package.dependencies]
+mccabe = ">=0.7.0,<0.8.0"
+pycodestyle = ">=2.12.0,<2.13.0"
+pyflakes = ">=3.2.0,<3.3.0"
+
+[[package]]
+name = "isort"
+version = "6.0.0"
+description = "A Python utility / library to sort Python imports."
+optional = false
+python-versions = ">=3.9.0"
+groups = ["dev"]
+files = [
+    {file = "isort-6.0.0-py3-none-any.whl", hash = "sha256:567954102bb47bb12e0fae62606570faacddd441e45683968c8d1734fb1af892"},
+    {file = "isort-6.0.0.tar.gz", hash = "sha256:75d9d8a1438a9432a7d7b54f2d3b45cad9a4a0fdba43617d9873379704a8bdf1"},
+]
+
+[package.extras]
+colors = ["colorama"]
+plugins = ["setuptools"]
+
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+optional = false
+python-versions = ">=3.6"
+groups = ["dev"]
+files = [
+    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+groups = ["dev"]
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "nose"
+version = "1.3.7"
+description = "nose extends unittest to make testing easier"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "nose-1.3.7-py2-none-any.whl", hash = "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a"},
+    {file = "nose-1.3.7-py3-none-any.whl", hash = "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac"},
+    {file = "nose-1.3.7.tar.gz", hash = "sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98"},
+]
+
+[[package]]
+name = "numpy"
+version = "2.0.2"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"},
+    {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"},
+    {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"},
+    {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"},
+    {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"},
+    {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"},
+    {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"},
+    {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"},
+    {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"},
+    {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"},
+    {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"},
+    {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"},
+    {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"},
+    {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"},
+    {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"},
+    {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"},
+    {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"},
+    {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"},
+    {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"},
+    {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"},
+    {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"},
+    {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"},
+    {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"},
+    {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"},
+    {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"},
+    {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"},
+    {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"},
+    {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"},
+    {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"},
+    {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"},
+    {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"},
+    {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"},
+    {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"},
+    {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"},
+    {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"},
+    {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"},
+    {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"},
+    {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"},
+    {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"},
+    {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"},
+    {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"},
+    {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"},
+    {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"},
+    {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"},
+    {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"},
+]
+
+[[package]]
+name = "packaging"
+version = "24.2"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+    {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.3.6"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
+    {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
+]
+
+[package.extras]
+docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
+type = ["mypy (>=1.11.2)"]
+
+[[package]]
+name = "py4j"
+version = "0.10.9.7"
+description = "Enables Python programs to dynamically access arbitrary Java objects"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"},
+    {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
+]
+
+[[package]]
+name = "pycodestyle"
+version = "2.12.1"
+description = "Python style guide checker"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"},
+    {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
+]
+
+[[package]]
+name = "pyflakes"
+version = "3.2.0"
+description = "passive checker of Python programs"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
+    {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
+]
+
+[[package]]
+name = "pyspark"
+version = "3.5.4"
+description = "Apache Spark Python API"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pyspark-3.5.4.tar.gz", hash = "sha256:1c2926d63020902163f58222466adf6f8016f6c43c1f319b8e7a71dbaa05fc51"},
+]
+
+[package.dependencies]
+py4j = "0.10.9.7"
+
+[package.extras]
+connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+ml = ["numpy (>=1.15,<2)"]
+mllib = ["numpy (>=1.15,<2)"]
+pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+
+[[package]]
+name = "tomli"
+version = "2.2.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
+files = [
+    {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
+    {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
+    {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
+    {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
+    {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
+    {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
+    {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
+    {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
+    {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
+    {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
+    {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
+    {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
+    {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
+    {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
+    {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
+    {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
+    {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
+    {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
+    {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
+    {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
+[metadata]
+lock-version = "2.1"
+python-versions = ">=3.9 <3.13"
+content-hash = "430f562a040c0eabc2fe5c93757801dd9d7ed4c5173be37c7fc3808e04668ccd"
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 000000000..076cc3ce4
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,38 @@
+[tool.poetry]
+name = "graphframes-py"
+version = "0.8.4"
+description = "GraphFrames: Graph Processing Framework for Apache Spark"
+authors = ["GraphFrames Contributors <graphframes@googlegroups.com>"]
+license = "Apache 2.0"
+readme = "../README.md"
+packages = [{include = "graphframes"}]
+
+[tool.poetry.urls]
+"Project Homepage" = "https://graphframes.github.io/graphframes"
+"PyPi Homepage" = "https://pypi.org/project/graphframes-py"
+"Code Repository" = "https://github.com/graphframes/graphframes"
+"Bug Tracker" = "https://github.com/graphframes/graphframes/issues"
+
+[tool.poetry.dependencies]
+python = ">=3.9 <3.13"
+nose = "1.3.7"
+pyspark = ">= 2.0.0"
+numpy = ">= 1.7"
+
+[tool.poetry.group.dev.dependencies]
+black = "^25.1.0"
+flake8 = "^7.1.1"
+isort = "^6.0.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.black]
+line-length = 100
+target-version = ["py39"]
+include = ["graphframes", "test"]
+
+[tool.isort]
+profile = "black"
+src_paths = ["graphframes", "test"]

From 3cea1a88e85b54662a9126dc56ab52c625ca7b3a Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 14:24:58 -0800
Subject: [PATCH 17/53] Short README for Python package, poetry won't allow a
 ../README.md path

---
 python/README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 python/README.md

diff --git a/python/README.md b/python/README.md
new file mode 100644
index 000000000..0ab7cd4ba
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,17 @@
+# GraphFrames `graphframes-py` Python Package
+
+The is the officila [graphframes-py PyPI package](https://pypi.org/project/graphframes-py/), which is a Python wrapper for the Scala GraphFrames library. This package is maintained by the GraphFrames project and is available on PyPI.
+
+For instructions on GraphFrames, check the project [../README.md](../README.md). See [Installation and Quick-Start](#installation-and-quick-start) for the best way to install and use GraphFrames.
+
+## Running `graphframes-py`
+
+You should use GraphFrames via the `--packages` argument to `pyspark` or `spark-submit`, but this package is helpful in development environments.
+
+```bash
+# Interactive Python
+$ pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12
+
+# Submit a script in Scala/Java/Python
+$ spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 script.py
+```

From 87cc97514c4aa7e1d76b5a3bb80fd5ee4e2abf50 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 14:25:41 -0800
Subject: [PATCH 18/53] Remove requirements files in favor of pyproject.toml

---
 python/requirements-dev.txt | 4 ----
 python/requirements.txt     | 4 ----
 2 files changed, 8 deletions(-)
 delete mode 100644 python/requirements-dev.txt
 delete mode 100644 python/requirements.txt

diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt
deleted file mode 100644
index 6e596dc62..000000000
--- a/python/requirements-dev.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-flake8==7.1.1
-isort==6.0.0
-mypy==1.14.1
-pre-commit==4.0.1
diff --git a/python/requirements.txt b/python/requirements.txt
deleted file mode 100644
index 9893b3cb1..000000000
--- a/python/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# This file should list any python package dependencies.
-nose==1.3.7
-pyspark>=2.0.0
-numpy>=1.7

From 6f84a5a634bcdf731c469644a3509074c3ce58d7 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 14:33:18 -0800
Subject: [PATCH 19/53] Try to poetrize CI build

---
 .github/workflows/python-ci.yml | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 157b328f1..47a484c1e 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -31,15 +31,13 @@ jobs:
     - uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install python dependencies
+    - name: Build Python package and its dependencies
+      working-directory: ./python
       run: |
-        python -m pip install --upgrade pip wheel
-        pip install -r ./python/requirements.txt
-        pip install -r ./python/requirements-dev.txt
-        pip install pyspark==${{ matrix.spark-version }}
+        python -m pip install --upgrade poetry
+        poetry build
+        poetry install
     - name: Test
       run: |
-        python python/setup.py install
-        python python/setup.py bdist_wheel
         export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
         ./python/run-tests.sh

From 9a8eef0d29e2d36129427ae9efa13fc8bb044021 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 15:01:09 -0800
Subject: [PATCH 20/53] pyspark min 3.4

---
 python/poetry.lock    | 2 +-
 python/pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/poetry.lock b/python/poetry.lock
index 6eb61618d..0fb5fb139 100644
--- a/python/poetry.lock
+++ b/python/poetry.lock
@@ -357,4 +357,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9 <3.13"
-content-hash = "430f562a040c0eabc2fe5c93757801dd9d7ed4c5173be37c7fc3808e04668ccd"
+content-hash = "52c129fee3e94e69edf727f219bc7582ddbfcedf6c43547a7f67a876051bd7c4"
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 076cc3ce4..0cff88d08 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -16,7 +16,7 @@ packages = [{include = "graphframes"}]
 [tool.poetry.dependencies]
 python = ">=3.9 <3.13"
 nose = "1.3.7"
-pyspark = ">= 2.0.0"
+pyspark = "^3.4"
 numpy = ">= 1.7"
 
 [tool.poetry.group.dev.dependencies]

From 75ecd997d2cfbf0e52799b86b3f9f261e63e375e Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 15:02:53 -0800
Subject: [PATCH 21/53] Local python README in pyproject.toml

---
 python/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 0cff88d08..84050dcc2 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ version = "0.8.4"
 description = "GraphFrames: Graph Processing Framework for Apache Spark"
 authors = ["GraphFrames Contributors <graphframes@googlegroups.com>"]
 license = "Apache 2.0"
-readme = "../README.md"
+readme = "README.md"
 packages = [{include = "graphframes"}]
 
 [tool.poetry.urls]

From 80231d0e2262eb1044a619dbd2792f6cdcc41d35 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 15:23:20 -0800
Subject: [PATCH 22/53] Trying to remove he working folder to debug scala issue

---
 .github/workflows/python-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 47a484c1e..3af7339b0 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -32,7 +32,7 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - name: Build Python package and its dependencies
-      working-directory: ./python
+      # working-directory: ./python
       run: |
         python -m pip install --upgrade poetry
         poetry build

From 2a9170baad6e6d2791f258841b7db54cecec251d Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 15:50:44 -0800
Subject: [PATCH 23/53] Set Python working directory again

---
 .github/workflows/python-ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 3af7339b0..f863785b0 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -10,6 +10,7 @@ jobs:
             scala-version: 2.12.18
             python-version: 3.9.19
     runs-on: ubuntu-22.04
+    
     env:
       # define Java options for both official sbt and sbt-extras
       JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8
@@ -32,7 +33,7 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - name: Build Python package and its dependencies
-      # working-directory: ./python
+      working-directory: ./python
       run: |
         python -m pip install --upgrade poetry
         poetry build

From 3de22636760c3059361efd5c6135c99236c88949 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 15:51:43 -0800
Subject: [PATCH 24/53] Accidental newline

---
 .github/workflows/python-ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index f863785b0..47a484c1e 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -10,7 +10,6 @@ jobs:
             scala-version: 2.12.18
             python-version: 3.9.19
     runs-on: ubuntu-22.04
-    
     env:
       # define Java options for both official sbt and sbt-extras
       JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8

From 4662717935fd3629a237d1ab454ba6fc6b42327f Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:10:08 -0800
Subject: [PATCH 25/53] Install Python for test...

---
 .github/workflows/python-ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 47a484c1e..519c5fb4a 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -39,5 +39,8 @@ jobs:
         poetry install
     - name: Test
       run: |
+        python -m pip install --upgrade poetry
+        poetry build
+        poetry install
         export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
         ./python/run-tests.sh

From 1b7b9f83a82cb120cd831cbeb38a71065d9030fd Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:19:15 -0800
Subject: [PATCH 26/53] Run tests from python/ folder

---
 .github/workflows/python-ci.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 519c5fb4a..72ffe6e22 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -38,9 +38,7 @@ jobs:
         poetry build
         poetry install
     - name: Test
+      working-directory: ./python
       run: |
-        python -m pip install --upgrade poetry
-        poetry build
-        poetry install
         export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
         ./python/run-tests.sh

From 58da4932cb3997b797a8ec9f98e6bd95e49f543e Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:37:58 -0800
Subject: [PATCH 27/53] Try running tests from python/

---
 .github/workflows/python-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 72ffe6e22..2e3e44311 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -41,4 +41,4 @@ jobs:
       working-directory: ./python
       run: |
         export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
-        ./python/run-tests.sh
+        ./run-tests.sh

From 9f4aa24e6d77ccb45c42b4ea8bf02b1905e826a1 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:45:10 -0800
Subject: [PATCH 28/53] poetry run the unit tests

---
 .github/workflows/python-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 2e3e44311..3d939db65 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -40,5 +40,5 @@ jobs:
     - name: Test
       working-directory: ./python
       run: |
-        export SPARK_HOME=$(python -c "import os; from importlib.util import find_spec; print(os.path.join(os.path.dirname(find_spec('pyspark').origin)))")
+        export SPARK_HOME=$(poetry run python -c "import os; from importlib.util import find_spec; spec = find_spec('pyspark'); print(os.path.join(os.path.dirname(spec.origin)))")
         ./run-tests.sh

From 11b2782e519a518287b62b8e6969bc7b2f2f0947 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:49:18 -0800
Subject: [PATCH 29/53] poetry run the tests

---
 python/run-tests.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/run-tests.sh b/python/run-tests.sh
index af4e0a139..4c558dfd3 100755
--- a/python/run-tests.sh
+++ b/python/run-tests.sh
@@ -38,7 +38,7 @@ echo $pyver
 
 LIBS=""
 for lib in "$SPARK_HOME/python/lib"/*zip ; do
-  LIBS=$LIBS:$lib
+    LIBS=$LIBS:$lib
 done
 
 # The current directory of the script.
@@ -51,7 +51,7 @@ assembly_path="$DIR/../target/scala-$scala_version_major_minor"
 echo `ls $assembly_path/graphframes-assembly*.jar`
 JAR_PATH=""
 for assembly in $assembly_path/graphframes-assembly*.jar ; do
-  JAR_PATH=$assembly
+    JAR_PATH=$assembly
 done
 
 export PYSPARK_SUBMIT_ARGS="--driver-memory 2g --executor-memory 2g --jars $JAR_PATH pyspark-shell "
@@ -64,14 +64,14 @@ export PYTHONPATH=$PYTHONPATH:graphframes
 # Run test suites
 
 if [[ "$python_major" == "2" ]]; then
-
-  # Horrible hack for spark 1.x: we manually remove some log lines to stay below the 4MB log limit on Travis.
-  $PYSPARK_DRIVER_PYTHON `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
-
+    
+    # Horrible hack for spark 1.x: we manually remove some log lines to stay below the 4MB log limit on Travis.
+    poetry run $PYSPARK_DRIVER_PYTHON `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+    
 else
-
-  $PYSPARK_DRIVER_PYTHON -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
-
+    
+    poetry run $PYSPARK_DRIVER_PYTHON -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+    
 fi
 
 # Exit immediately if the tests fail.

From 9772344b96fb353a3f7ad17f9198a28ee0aef568 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:52:30 -0800
Subject: [PATCH 30/53] Try just using 'python' instead of a path

---
 python/run-tests.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/run-tests.sh b/python/run-tests.sh
index 4c558dfd3..6527a60a7 100755
--- a/python/run-tests.sh
+++ b/python/run-tests.sh
@@ -66,11 +66,11 @@ export PYTHONPATH=$PYTHONPATH:graphframes
 if [[ "$python_major" == "2" ]]; then
     
     # Horrible hack for spark 1.x: we manually remove some log lines to stay below the 4MB log limit on Travis.
-    poetry run $PYSPARK_DRIVER_PYTHON `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+    poetry run python `which nosetests` -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
     
 else
     
-    poetry run $PYSPARK_DRIVER_PYTHON -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
+    poetry run python -m "nose" -v --all-modules -w $DIR 2>&1 | grep -vE "INFO (ParquetOutputFormat|SparkContext|ContextCleaner|ShuffleBlockFetcherIterator|MapOutputTrackerMaster|TaskSetManager|Executor|MemoryStore|CacheManager|BlockManager|DAGScheduler|PythonRDD|TaskSchedulerImpl|ZippedPartitionsRDD2)";
     
 fi
 

From d55dbfe4815c2f0b0870cdc53b65fb9d9a075b42 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:58:08 -0800
Subject: [PATCH 31/53] poetry run the last line, graphframes.main

---
 python/run-tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/run-tests.sh b/python/run-tests.sh
index 6527a60a7..0382efbd0 100755
--- a/python/run-tests.sh
+++ b/python/run-tests.sh
@@ -83,4 +83,4 @@ test ${PIPESTATUS[0]} -eq 0 || exit 1;
 
 cd "$DIR"
 
-$PYSPARK_PYTHON -u ./graphframes/graphframe.py "$@"
+poetry run python -u ./graphframes/graphframe.py "$@"

From 2fc4d0818f35a86874b67f86893f48b5f83d7285 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 16:59:23 -0800
Subject: [PATCH 32/53] Remove test/ folder from style paths, it doesn't exist

---
 python/pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 84050dcc2..e21c4cc80 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -31,8 +31,8 @@ build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 100
 target-version = ["py39"]
-include = ["graphframes", "test"]
+include = ["graphframes"]
 
 [tool.isort]
 profile = "black"
-src_paths = ["graphframes", "test"]
+src_paths = ["graphframes"]

From 8297a13232f29f9466f8c0ac3bd577e2cbb066ea Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:18:13 -0800
Subject: [PATCH 33/53] Remove .vscode

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7036d69e3..93246acbe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,7 +27,6 @@ project/plugins/project/
 
 # Mac
 *.DS_Store
-.vscode
 
 # Python specific
 python/build

From 2035d9854344a53ce4ba77c1d6a4f7478763f963 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:18:42 -0800
Subject: [PATCH 34/53] VERSION back to 0.8.4

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 7ada0d303..b60d71966 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.8.5
+0.8.4

From f9f4bd7b9dbdf0bf18e1dde83090bdc59d5fc23d Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:19:55 -0800
Subject: [PATCH 35/53] Remove tutorials reference

---
 python/MANIFEST.in | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 4eb0ee5af..8e453d713 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -5,4 +5,3 @@
 recursive-include python/graphframes *.py
 recursive-exclude * __pycache__
 recursive-exclude * *.pyc
-include graphframes/tutorials/data/.exists

From 9ddd6b24cefc4528a9bfa75e8d8ddf3d365b8eaf Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:23:35 -0800
Subject: [PATCH 36/53] VERSION is a Python thing, it belongs in python/

---
 VERSION => python/VERSION | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename VERSION => python/VERSION (100%)

diff --git a/VERSION b/python/VERSION
similarity index 100%
rename from VERSION
rename to python/VERSION

From 7065647d6cf0be0513af34bf355aea21ff5a2090 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:33:27 -0800
Subject: [PATCH 37/53] Include the README.md and LICENSE in the Python package

---
 python/MANIFEST.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 8e453d713..f883d48c1 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -5,3 +5,5 @@
 recursive-include python/graphframes *.py
 recursive-exclude * __pycache__
 recursive-exclude * *.pyc
+include README.md
+include LICENSE

From a6c7e91f151ae7f04268f98c52aed995855e881f Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Sun, 16 Feb 2025 17:34:21 -0800
Subject: [PATCH 38/53] Some classifiers for pyproject.toml

---
 python/pyproject.toml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index e21c4cc80..8c0c1ba05 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -6,6 +6,16 @@ authors = ["GraphFrames Contributors <graphframes@googlegroups.com>"]
 license = "Apache 2.0"
 readme = "README.md"
 packages = [{include = "graphframes"}]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12"
+]
 
 [tool.poetry.urls]
 "Project Homepage" = "https://graphframes.github.io/graphframes"

From 51e3e6d95d312d83e01d91151bcc90e5e9a63edf Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 08:49:21 -0800
Subject: [PATCH 39/53] Trying poetry install action instead of manual install

---
 .github/workflows/python-ci.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 3d939db65..1095ce49e 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -31,10 +31,16 @@ jobs:
     - uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
+    - name: Install and configure Poetry
+      uses: snok/install-poetry@v1
+      with:
+        version: 2.1.1
+        virtualenvs-create: true
+        virtualenvs-in-project: false
+        installer-parallel: true
     - name: Build Python package and its dependencies
       working-directory: ./python
       run: |
-        python -m pip install --upgrade poetry
         poetry build
         poetry install
     - name: Test

From 272be064e60f3c07817533a1e02b5a0eec2b89cf Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 08:53:55 -0800
Subject: [PATCH 40/53] Removing SPARK_HOME

---
 .github/workflows/python-ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 1095ce49e..7f201a049 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -46,5 +46,4 @@ jobs:
     - name: Test
       working-directory: ./python
       run: |
-        export SPARK_HOME=$(poetry run python -c "import os; from importlib.util import find_spec; spec = find_spec('pyspark'); print(os.path.join(os.path.dirname(spec.origin)))")
         ./run-tests.sh

From 45879995d1c6c6bc22a9f82b59290f7912b5ba3b Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 09:46:00 -0800
Subject: [PATCH 41/53] Returned SPARK_HOME settings

---
 .github/workflows/python-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 7f201a049..1095ce49e 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -46,4 +46,5 @@ jobs:
     - name: Test
       working-directory: ./python
       run: |
+        export SPARK_HOME=$(poetry run python -c "import os; from importlib.util import find_spec; spec = find_spec('pyspark'); print(os.path.join(os.path.dirname(spec.origin)))")
         ./run-tests.sh

From 2422b226b341cdf728fdaaf9ca109833d6ad11fe Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 10:06:54 -0800
Subject: [PATCH 42/53] Minimized the PR to just these files

---
 python/MANIFEST.in                            |   1 +
 python/graphframes/tutorials/download.py      |  64 ++
 python/graphframes/tutorials/motif.py         | 207 +++++++
 python/graphframes/tutorials/stackexchange.py | 579 ++++++++++++++++++
 python/graphframes/tutorials/utils.py         | 122 ++++
 5 files changed, 973 insertions(+)
 create mode 100755 python/graphframes/tutorials/download.py
 create mode 100644 python/graphframes/tutorials/motif.py
 create mode 100644 python/graphframes/tutorials/stackexchange.py
 create mode 100644 python/graphframes/tutorials/utils.py

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 73eaf8ba2..22100a328 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -2,3 +2,4 @@
 # https://github.com/pypa/sampleproject/blob/master/MANIFEST.in
 # For more details about the MANIFEST file, you may read the docs at
 # https://docs.python.org/2/distutils/sourcedist.html#the-manifest-in-template
+include graphframes/tutorials/data/.exists
diff --git a/python/graphframes/tutorials/download.py b/python/graphframes/tutorials/download.py
new file mode 100755
index 000000000..154d84c14
--- /dev/null
+++ b/python/graphframes/tutorials/download.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import os
+import click
+import requests
+import py7zr
+
+
+@click.command()
+@click.argument("subdomain")
+@click.option("--data-dir", default="python/graphframes/tutorials/data", help="Directory to store downloaded files")
+@click.option(
+    "--extract/--no-extract", default=True, help="Whether to extract the archive after download"
+)
+def download_stackexchange(subdomain: str, data_dir: str, extract: bool) -> None:
+    """Download Stack Exchange archive for a given SUBDOMAIN.
+
+    Example: python/graphframes/tutorials/download.py stats.meta
+
+    Note: This won't work for stackoverflow.com archives due to size.
+    """
+    # Create data directory if it doesn't exist
+    os.makedirs(data_dir, exist_ok=True)
+
+    # Construct archive URL and filename
+    archive_url = f"https://archive.org/download/stackexchange/{subdomain}.stackexchange.com.7z"
+    archive_path = os.path.join(data_dir, f"{subdomain}.stackexchange.com.7z")
+
+    click.echo(f"Downloading archive from {archive_url}")
+
+    try:
+        # Download the file
+        response = requests.get(archive_url, stream=True)
+        response.raise_for_status()  # Raise exception for bad status codes
+
+        total_size = int(response.headers.get("content-length", 0))
+
+        with click.progressbar(length=total_size, label="Downloading") as bar:
+            with open(archive_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        bar.update(len(chunk))
+
+        click.echo(f"Download complete: {archive_path}")
+
+        # Extract if requested
+        if extract:
+            click.echo("Extracting archive...")
+            output_dir = f"{subdomain}.stackexchange.com"
+            with py7zr.SevenZipFile(archive_path, mode="r") as z:
+                z.extractall(path=os.path.join(data_dir, output_dir))
+            click.echo(f"Extraction complete: {output_dir}")
+
+    except requests.exceptions.RequestException as e:
+        click.echo(f"Error downloading archive: {e}", err=True)
+        raise click.Abort()
+    except py7zr.Bad7zFile as e:
+        click.echo(f"Error extracting archive: {e}", err=True)
+        raise click.Abort()
+
+
+if __name__ == "__main__":
+    download_stackexchange()
diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py
new file mode 100644
index 000000000..4a2189c56
--- /dev/null
+++ b/python/graphframes/tutorials/motif.py
@@ -0,0 +1,207 @@
+# Demonstrate GraphFrames network motif finding capabilities
+
+#
+# Interactive Usage: pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12
+#
+# Batch Usage: spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py
+#
+
+import pyspark.sql.functions as F
+from pyspark import SparkContext
+from pyspark.sql import DataFrame, SparkSession
+
+from graphframes import GraphFrame
+
+# Initialize a SparkSession
+spark: SparkSession = (
+    SparkSession.builder.appName("Stack Overflow Motif Analysis")
+    # Lets the Id:(Stack Overflow int) and id:(GraphFrames ULID) coexist
+    .config("spark.sql.caseSensitive", True).getOrCreate()
+)
+sc: SparkContext = spark.sparkContext
+sc.setCheckpointDir("/tmp/graphframes-checkpoints")
+
+# Change me if you download a different stackexchange site
+STACKEXCHANGE_SITE = "stats.meta.stackexchange.com"
+BASE_PATH = f"python/graphframes/tutorials/data/{STACKEXCHANGE_SITE}"
+
+#
+# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache.
+#
+
+# We created these in stackexchange.py from Stack Exchange data dump XML files
+NODES_PATH: str = f"{BASE_PATH}/Nodes.parquet"
+nodes_df: DataFrame = spark.read.parquet(NODES_PATH)
+
+# Repartition the nodes to give our motif searches parallelism
+nodes_df = nodes_df.repartition(50).checkpoint().cache()
+
+# We created these in stackexchange.py from Stack Exchange data dump XML files
+EDGES_PATH: str = f"{BASE_PATH}/Edges.parquet"
+edges_df: DataFrame = spark.read.parquet(EDGES_PATH)
+
+# Repartition the edges to give our motif searches parallelism
+edges_df = edges_df.repartition(50).checkpoint().cache()
+
+# What kind of nodes we do we have to work with?
+node_counts = (
+    nodes_df.select("id", F.col("Type").alias("Node Type"))
+    .groupBy("Node Type")
+    .count()
+    .orderBy(F.col("count").desc())
+    # Add a comma formatted column for display
+    .withColumn("count", F.format_number(F.col("count"), 0))
+)
+node_counts.show()
+
+# What kind of edges do we have to work with?
+edge_counts = (
+    edges_df.select("src", "dst", F.col("relationship").alias("Edge Type"))
+    .groupBy("Edge Type")
+    .count()
+    .orderBy(F.col("count").desc())
+    # Add a comma formatted column for display
+    .withColumn("count", F.format_number(F.col("count"), 0))
+)
+edge_counts.show()
+
+g = GraphFrame(nodes_df, edges_df)
+
+g.vertices.show(10)
+print(f"Node columns: {g.vertices.columns}")
+
+g.edges.sample(0.0001).show(10)
+
+# Sanity test that all edges have valid ids
+edge_count = g.edges.count()
+valid_edge_count = (
+    g.edges.join(g.vertices, on=g.edges.src == g.vertices.id)
+    .select("src", "dst", "relationship")
+    .join(g.vertices, on=g.edges.dst == g.vertices.id)
+    .count()
+)
+
+# Just up and die if we have edges that point to non-existent nodes
+assert (
+    edge_count == valid_edge_count
+), f"Edge count {edge_count} != valid edge count {valid_edge_count}"
+print(f"Edge count: {edge_count:,} == Valid edge count: {valid_edge_count:,}")
+
+# G4: Continuous Triangles
+paths = g.find("(a)-[e1]->(b); (b)-[e2]->(c); (c)-[e3]->(a)")
+
+# Show the first path
+paths.show(3)
+
+graphlet_type_df = paths.select(
+    F.col("a.Type").alias("A_Type"),
+    F.col("e1.relationship").alias("(a)-[e1]->(b)"),
+    F.col("b.Type").alias("B_Type"),
+    F.col("e2.relationship").alias("(b)-[e2]->(c)"),
+    F.col("c.Type").alias("C_Type"),
+    F.col("e3.relationship").alias("(c)-[e3]->(a)"),
+)
+
+graphlet_count_df = (
+    graphlet_type_df.groupby(
+        "A_Type", "(a)-[e1]->(b)", "B_Type", "(b)-[e2]->(c)", "C_Type", "(c)-[e3]->(a)"
+    )
+    .count()
+    .orderBy(F.col("count").desc())
+    # Add a comma formatted column for display
+    .withColumn("count", F.format_number(F.col("count"), 0))
+)
+graphlet_count_df.show()
+
+# G5: Divergent Triangles
+paths = g.find("(a)-[e1]->(b); (a)-[e2]->(c); (c)-[e3]->(b)")
+
+graphlet_type_df = paths.select(
+    F.col("a.Type").alias("A_Type"),
+    F.col("e1.relationship").alias("(a)-[e1]->(b)"),
+    F.col("b.Type").alias("B_Type"),
+    F.col("e2.relationship").alias("(a)-[e2]->(c)"),
+    F.col("c.Type").alias("C_Type"),
+    F.col("e3.relationship").alias("(c)-[e3]->(b)"),
+)
+
+graphlet_count_df = (
+    graphlet_type_df.groupby(
+        "A_Type", "(a)-[e1]->(b)", "B_Type", "(a)-[e2]->(c)", "C_Type", "(c)-[e3]->(b)"
+    )
+    .count()
+    .orderBy(F.col("count").desc())
+    # Add a comma formatted column for display
+    .withColumn("count", F.format_number(F.col("count"), 0))
+)
+graphlet_count_df.show()
+
+# G17: A directed 3-path is a surprisingly diverse graphlet
+paths = g.find("(a)-[e1]->(b); (b)-[e2]->(c); (d)-[e3]->(c)")
+
+# Visualize the four-path by counting instances of paths by node / edge type
+graphlet_type_df = paths.select(
+    F.col("a.Type").alias("A_Type"),
+    F.col("e1.relationship").alias("(a)-[e1]->(b)"),
+    F.col("b.Type").alias("B_Type"),
+    F.col("e2.relationship").alias("(b)-[e2]->(c)"),
+    F.col("c.Type").alias("C_Type"),
+    F.col("e3.relationship").alias("(d)-[e3]->(c)"),
+    F.col("d.Type").alias("D_Type"),
+)
+graphlet_count_df = (
+    graphlet_type_df.groupby(
+        "A_Type",
+        "(a)-[e1]->(b)",
+        "B_Type",
+        "(b)-[e2]->(c)",
+        "C_Type",
+        "(d)-[e3]->(c)",
+        "D_Type",
+    )
+    .count()
+    .orderBy(F.col("count").desc())
+    # Add a comma formatted column for display
+    .withColumn("count", F.format_number(F.col("count"), 0))
+)
+graphlet_count_df.show()
+
+graphlet_count_df.orderBy(
+    [
+        "A_Type",
+        "(a)-[e1]->(b)",
+        "B_Type",
+        "(b)-[e2]->(c)",
+        "C_Type",
+        "(d)-[e3]->(c)",
+        "D_Type",
+    ],
+    ascending=False,
+).show(104)
+
+# A user answers an answer that answers a question that links to an answer.
+linked_vote_paths = paths.filter(
+    (F.col("a.Type") == "Vote")
+    & (F.col("e1.relationship") == "CastFor")
+    & (F.col("b.Type") == "Question")
+    & (F.col("e2.relationship") == "Links")
+    & (F.col("c.Type") == "Question")
+    & (F.col("e3.relationship") == "CastFor")
+    & (F.col("d.Type") == "Vote")
+)
+
+# Sanity check the count - it should match the table above
+linked_vote_paths.count()
+
+b_vote_counts = linked_vote_paths.select("a", "b").distinct().groupBy("b").count()
+c_vote_counts = linked_vote_paths.select("c", "d").distinct().groupBy("c").count()
+
+linked_vote_counts = (
+    linked_vote_paths.filter((F.col("a.VoteTypeId") == 2) & (F.col("d.VoteTypeId") == 2))
+    .select("b", "c")
+    .join(b_vote_counts, on="b", how="inner")
+    .withColumnRenamed("count", "b_count")
+    .join(c_vote_counts, on="c", how="inner")
+    .withColumnRenamed("count", "c_count")
+)
+linked_vote_counts.stat.corr("b_count", "c_count")
diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py
new file mode 100644
index 000000000..c52f323bb
--- /dev/null
+++ b/python/graphframes/tutorials/stackexchange.py
@@ -0,0 +1,579 @@
+# Build a Graph out of the Stack Exchange Data Dump XML files
+
+#
+# Interactive Usage: pyspark --packages com.databricks:spark-xml_2.12:0.18.0
+#
+# Batch Usage: spark-submit --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py
+#
+
+import re
+from typing import List, Tuple
+
+import pyspark.sql.functions as F
+import pyspark.sql.types as T
+from pyspark.sql import DataFrame, SparkSession
+
+# Change me if you download a different stackexchange site
+STACKEXCHANGE_SITE = "stats.meta.stackexchange.com"
+BASE_PATH = f"python/graphframes/tutorials/data/{STACKEXCHANGE_SITE}"
+
+
+#
+# Some utility functions
+#
+
+
+def remove_prefix(df: DataFrame) -> DataFrame:
+    """Remove the _ prefix present in the fields of the DataFrame"""
+    field_names = [x.name for x in df.schema]
+    new_field_names = [x[1:] for x in field_names]
+    s = []
+
+    # Substitute the old name for the new one
+    for old, new in zip(field_names, new_field_names):
+        s.append(F.col(old).alias(new))
+    return df.select(s)
+
+
+@F.udf(returnType=T.ArrayType(T.StringType()))
+def split_tags(tags: str) -> List[str]:
+    if not tags:
+        return []
+    # Remove < and > and split into array
+    return re.findall(r"<([^>]+)>", tags)
+
+
+#
+# Initialize a SparkSession with case sensitivity
+#
+
+spark: SparkSession = (
+    SparkSession.builder.appName("Stack Exchange Graph Builder")
+    # Lets the Id:(Stack Overflow int) and id:(GraphFrames UUID) coexist
+    .config("spark.sql.caseSensitive", True).getOrCreate()
+)
+
+print("Loading data for stats.meta.stackexchange.com ...")
+
+
+#
+# Load the Posts...
+#
+posts_df: DataFrame = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="posts")
+    .load(f"{BASE_PATH}/Posts.xml")
+)
+print(f"\nTotal Posts:       {posts_df.count():,}")
+
+# Remove the _ prefix from field names
+posts_df = remove_prefix(posts_df)
+
+# Create a list of tags
+posts_df = (
+    posts_df.withColumn(
+        "ParsedTags", F.split(F.regexp_replace(F.col("Tags"), "^\\||\\|$", ""), "\\|")
+    )
+    .drop("Tags")
+    .withColumnRenamed("ParsedTags", "Tags")
+)
+
+
+#
+# Building blocks: separate the questions and answers
+#
+
+# Do the questions look ok? Questions have NO parent ID and DO have a Title
+questions_df: DataFrame = posts_df.filter(posts_df.ParentId.isNull())
+questions_df = questions_df.withColumn("Type", F.lit("Question")).cache()
+print(f"\nTotal questions: {questions_df.count():,}\n")
+
+questions_df.select("ParentId", "Title", "Body").show(10)
+
+# Answers DO have a ParentId parent post and no Title
+answers_df: DataFrame = posts_df.filter(posts_df.ParentId.isNotNull())
+answers_df = answers_df.withColumn("Type", F.lit("Answer")).cache()
+print(f"\nTotal answers: {answers_df.count():,}\n")
+
+answers_df.select("ParentId", "Title", "Body").show(10)
+
+
+#
+# Load the PostLinks...
+#
+
+post_links_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="postlinks")
+    .load(f"{BASE_PATH}/PostLinks.xml")
+)
+print(f"Total PostLinks:   {post_links_df.count():,}")
+
+# Remove the _ prefix from field names
+post_links_df = (
+    remove_prefix(post_links_df)
+    .withColumn(
+        "LinkType",
+        F.when(F.col("LinkTypeId") == 1, "Linked")
+        .when(F.col("LinkTypeId") == 3, "Duplicate")
+        .otherwise("Unknown"),
+    )
+    .withColumn("Type", F.lit("PostLinks"))
+)
+
+
+#
+# Load the PostHistory...
+#
+
+post_history_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="posthistory")
+    .load(f"{BASE_PATH}/PostHistory.xml")
+)
+print(f"Total PostHistory: {post_history_df.count():,}")
+
+# Remove the _ prefix from field names
+post_history_df = remove_prefix(post_history_df).withColumn("Type", F.lit("PostHistory"))
+
+
+#
+# Load the Comments...
+#
+
+comments_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="comments")
+    .load(f"{BASE_PATH}/Comments.xml")
+)
+print(f"Total Comments:    {comments_df.count():,}")
+
+# Remove the _ prefix from field names
+comments_df = remove_prefix(comments_df).withColumn("Type", F.lit("Comment"))
+
+
+#
+# Load the Users...
+#
+
+users_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="users")
+    .load(f"{BASE_PATH}/Users.xml")
+)
+print(f"Total Users:       {users_df.count():,}")
+
+# Remove the _ prefix from field names
+users_df = remove_prefix(users_df).withColumn("Type", F.lit("User"))
+
+
+#
+# Load the Votes...
+#
+
+votes_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="votes")
+    .load(f"{BASE_PATH}/Votes.xml")
+)
+print(f"Total Votes:       {votes_df.count():,}")
+
+# Remove the _ prefix from field names
+votes_df = remove_prefix(votes_df).withColumn("Type", F.lit("Vote"))
+
+# Add a VoteType column
+votes_df = votes_df.withColumn(
+    "VoteType",
+    F.when(F.col("VoteTypeId") == 2, "UpVote")
+    .when(F.col("VoteTypeId") == 3, "DownVote")
+    .when(F.col("VoteTypeId") == 4, "Favorite")
+    .when(F.col("VoteTypeId") == 5, "Close")
+    .when(F.col("VoteTypeId") == 6, "Reopen")
+    .when(F.col("VoteTypeId") == 7, "BountyStart")
+    .when(F.col("VoteTypeId") == 8, "BountyClose")
+    .when(F.col("VoteTypeId") == 9, "Deletion")
+    .when(F.col("VoteTypeId") == 10, "Undeletion")
+    .when(F.col("VoteTypeId") == 11, "Spam")
+    .when(F.col("VoteTypeId") == 12, "InformModerator")
+    .otherwise("Unknown"),
+)
+
+
+#
+# Load the Tags...
+#
+
+tags_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="tags")
+    .load(f"{BASE_PATH}/Tags.xml")
+)
+print(f"Total Tags:        {tags_df.count():,}")
+
+# Remove the _ prefix from field names
+tags_df = remove_prefix(tags_df).withColumn("Type", F.lit("Tag"))
+
+
+#
+# Load the Badges...
+#
+
+badges_df = (
+    spark.read.format("xml")
+    .options(rowTag="row")
+    .options(rootTag="badges")
+    .load(f"{BASE_PATH}/Badges.xml")
+)
+print(f"Total Badges:      {badges_df.count():,}\n")
+
+# Remove the _ prefix from field names
+badges_df = remove_prefix(badges_df).withColumn("Type", F.lit("Badge"))
+
+
+#
+# Form the nodes from the UNION of posts, users, votes and their combined schemas
+#
+
+all_cols: List[Tuple[str, T.StructField]] = list(
+    set(
+        list(zip(answers_df.columns, answers_df.schema))
+        + list(zip(questions_df.columns, questions_df.schema))
+        + list(zip(post_links_df.columns, post_links_df.schema))
+        + list(zip(comments_df.columns, comments_df.schema))
+        + list(zip(users_df.columns, users_df.schema))
+        + list(zip(votes_df.columns, votes_df.schema))
+        + list(zip(tags_df.columns, tags_df.schema))
+        + list(zip(badges_df.columns, badges_df.schema))
+    )
+)
+all_column_names: List[str] = sorted([x[0] for x in all_cols])
+
+
+def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]) -> DataFrame:
+    """Add any missing columns from any DataFrame among several we want to merge."""
+    for col_name, schema_field in all_cols:
+        if col_name not in df.columns:
+            df = df.withColumn(col_name, F.lit(None).cast(schema_field.dataType))
+    return df
+
+
+# Now apply this function to each of your DataFrames to get a consistent schema
+# posts_df = add_missing_columns(posts_df, all_cols).select(all_column_names)
+questions_df = add_missing_columns(questions_df, all_cols).select(all_column_names)
+answers_df = add_missing_columns(answers_df, all_cols).select(all_column_names)
+post_links_df = add_missing_columns(post_links_df, all_cols).select(all_column_names)
+users_df = add_missing_columns(users_df, all_cols).select(all_column_names)
+votes_df = add_missing_columns(votes_df, all_cols).select(all_column_names)
+tags_df = add_missing_columns(tags_df, all_cols).select(all_column_names)
+badges_df = add_missing_columns(badges_df, all_cols).select(all_column_names)
+assert (
+    set(questions_df.columns)
+    == set(answers_df.columns)
+    == set(post_links_df.columns)
+    == set(users_df.columns)
+    == set(votes_df.columns)
+    == set(all_column_names)
+    == set(tags_df.columns)
+    == set(badges_df.columns)
+)
+
+# Now union them together and remove duplicates
+nodes_df: DataFrame = (
+    questions_df.unionByName(answers_df)
+    .unionByName(post_links_df)
+    .unionByName(users_df)
+    .unionByName(votes_df)
+    .unionByName(tags_df)
+    .unionByName(badges_df)
+    .distinct()
+)
+print(f"Total distinct nodes: {nodes_df.count():,}")
+
+# Now add a unique ID field
+nodes_df = nodes_df.withColumn("id", F.expr("uuid()")).select("id", *all_column_names)
+
+# Now create posts - combined questions and answers for things that can apply to them both
+posts_df = questions_df.unionByName(answers_df).cache()
+
+#
+# Store the nodes to disk, reload and cache
+#
+
+NODES_PATH: str = f"{BASE_PATH}/Nodes.parquet"
+
+# Write to disk and load back again
+nodes_df.write.mode("overwrite").parquet(NODES_PATH)
+nodes_df = spark.read.parquet(NODES_PATH)
+
+nodes_df.select("id", "Type").groupBy("Type").count().orderBy(F.col("count").desc()).show()
+
+# +---------+------+
+# |     Type| count|
+# +---------+------+
+# |    Badge|43,029|
+# |     Vote|42,593|
+# |     User|37,709|
+# |   Answer| 2,978|
+# | Question| 2,025|
+# |PostLinks| 1,274|
+# |      Tag|   143|
+# +---------+------+
+
+# Helps performance of GraphFrames' algorithms
+nodes_df = nodes_df.cache()
+
+# Make sure we have the right columns and cached data
+posts_df = nodes_df.filter(nodes_df.Type.isin("Question", "Answer")).cache()
+questions_df = nodes_df.filter(nodes_df.Type == "Question").cache()
+answers_df = nodes_df.filter(nodes_df.Type == "Answer").cache()
+post_links_df = nodes_df.filter(nodes_df.Type == "PostLinks").cache()
+users_df = nodes_df.filter(nodes_df.Type == "User").cache()
+votes_df = nodes_df.filter(nodes_df.Type == "Vote").cache()
+tags_df = nodes_df.filter(nodes_df.Type == "Tag").cache()
+badges_df = nodes_df.filter(nodes_df.Type == "Badge").cache()
+
+
+#
+# Build the edges DataFrame:
+#
+# * [Vote]--CastFor-->[Post]
+# * [User]--Asks-->[Question]
+# * [User]--Posts-->[Answer]
+# * [Post]--Answers-->[Question]
+# * [Tag]--Tags-->[Post]
+# * [User]--Earns-->[Badge]
+# * [Post]--Links-->[Post]
+#
+# Remember: 'src', 'dst' and 'relationship' are standard edge fields in GraphFrames
+# Remember: we must produce src/dst based on lowercase 'id' UUID, not 'Id' which is Stack Overflow's integer.
+#
+
+#
+# Create a [Vote]--CastFor-->[Post] edge... remember a Post is a Question or Answer
+#
+
+src_vote_df: DataFrame = votes_df.select(
+    F.col("id").alias("src"),
+    F.col("Id").alias("VoteId"),
+    # Everything has all the fields - should build from base records but need UUIDs
+    F.col("PostId").alias("VotePostId"),
+)
+cast_for_edge_df: DataFrame = src_vote_df.join(
+    posts_df, on=src_vote_df.VotePostId == posts_df.Id, how="inner"
+).select(
+    # 'src' comes from the votes' 'id'
+    "src",
+    # 'dst' comes from the posts' 'id'
+    F.col("id").alias("dst"),
+    # All edges have a 'relationship' field
+    F.lit("CastFor").alias("relationship"),
+)
+print(f"Total CastFor edges: {cast_for_edge_df.count():,}")
+print(f"Percentage of linked votes: {cast_for_edge_df.count() / votes_df.count():.2%}\n")
+
+#
+# Create a [User]--Asks-->[Question] edge
+#
+
+questions_asked_df: DataFrame = questions_df.select(
+    F.col("OwnerUserId").alias("QuestionUserId"),
+    F.col("id").alias("dst"),
+    F.lit("Asks").alias("relationship"),
+)
+user_asks_edges_df: DataFrame = questions_asked_df.join(
+    users_df, on=questions_asked_df.QuestionUserId == users_df.Id, how="inner"
+).select(
+    # 'src' comes from the users' 'id'
+    F.col("id").alias("src"),
+    # 'dst' comes from the posts' 'id'
+    "dst",
+    # All edges have a 'relationship' field
+    "relationship",
+)
+print(f"Total Asks edges: {user_asks_edges_df.count():,}")
+print(
+    f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n"
+)
+
+#
+# Create a [User]--Posts-->[Answer] edge.
+#
+
+user_answers_df: DataFrame = answers_df.select(
+    F.col("OwnerUserId").alias("AnswerUserId"),
+    F.col("id").alias("dst"),
+    F.lit("Posts").alias("relationship"),
+)
+user_answers_edges_df = user_answers_df.join(
+    users_df, on=user_answers_df.AnswerUserId == users_df.Id, how="inner"
+).select(
+    # 'src' comes from the users' 'id'
+    F.col("id").alias("src"),
+    # 'dst' comes from the posts' 'id'
+    "dst",
+    # All edges have a 'relationship' field
+    "relationship",
+)
+print(f"Total User Answers edges: {user_answers_edges_df.count():,}")
+print(
+    f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n"
+)
+
+#
+# Create a [Answer]--Answers-->[Question] edge
+#
+
+src_answers_df: DataFrame = answers_df.select(
+    F.col("id").alias("src"),
+    F.col("Id").alias("AnswerId"),
+    F.col("ParentId").alias("AnswerParentId"),
+)
+question_answers_edges_df: DataFrame = src_answers_df.join(
+    posts_df, on=src_answers_df.AnswerParentId == questions_df.Id, how="inner"
+).select(
+    # 'src' comes from the answers' 'id'
+    "src",
+    # 'dst' comes from the posts' 'id'
+    F.col("id").alias("dst"),
+    # All edges have a 'relationship' field
+    F.lit("Answers").alias("relationship"),
+)
+print(f"Total Posts Answers edges: {question_answers_edges_df.count():,}")
+print(
+    f"Percentage of linked answers: {question_answers_edges_df.count() / answers_df.count():.2%}\n"
+)
+
+#
+# Create a [Tag]--Tags-->[Post] edge... remember a Post is a Question or Answer
+#
+
+src_tags_df: DataFrame = posts_df.select(
+    F.col("id").alias("dst"),
+    # First remove leading/trailing < and >, then split on "><"
+    F.explode("Tags").alias("Tag"),
+)
+tags_edge_df: DataFrame = src_tags_df.join(
+    tags_df, on=src_tags_df.Tag == tags_df.TagName, how="inner"
+).select(
+    # 'src' comes from the posts' 'id'
+    F.col("id").alias("src"),
+    # 'dst' comes from the tags' 'id'
+    "dst",
+    # All edges have a 'relationship' field
+    F.lit("Tags").alias("relationship"),
+)
+print(f"Total Tags edges: {tags_edge_df.count():,}")
+print(f"Percentage of linked tags: {tags_edge_df.count() / posts_df.count():.2%}\n")
+
+#
+# Create a [User]--Earns-->[Badge] edge
+#
+
+earns_edges_df: DataFrame = badges_df.select(
+    F.col("UserId").alias("BadgeUserId"),
+    F.col("id").alias("dst"),
+    F.lit("Earns").alias("relationship"),
+)
+earns_edges_df = earns_edges_df.join(
+    users_df, on=earns_edges_df.BadgeUserId == users_df.Id, how="inner"
+).select(
+    # 'src' comes from the users' 'id'
+    F.col("id").alias("src"),
+    # 'dst' comes from the badges' 'id'
+    "dst",
+    # All edges have a 'relationship' field
+    "relationship",
+)
+print(f"Total Earns edges: {earns_edges_df.count():,}")
+print(f"Percentage of earned badges: {earns_edges_df.count() / badges_df.count():.2%}\n")
+
+#
+# Create a [Post]--Links-->[Post] edge... remember a Post is a Question or Answer
+# Also a   [Post]--Duplicates-->[Post] edge... remember a Post is a Question or Answer
+#
+
+trim_links_df: DataFrame = post_links_df.select(
+    F.col("PostId").alias("SrcPostId"),
+    F.col("RelatedPostId").alias("DstPostId"),
+    "LinkType",
+)
+links_src_edge_df: DataFrame = trim_links_df.join(
+    posts_df.drop("LinkType"), on=trim_links_df.SrcPostId == posts_df.Id, how="inner"
+).select(
+    # 'dst' comes from the posts' 'id'
+    F.col("id").alias("src"),
+    "DstPostId",
+    "LinkType",
+)
+raw_links_edge_df = links_src_edge_df.join(
+    posts_df.drop("LinkType"), on=links_src_edge_df.DstPostId == posts_df.Id, how="inner"
+).select(
+    "src",
+    # 'src' comes from the posts' 'id'
+    F.col("id").alias("dst"),
+    # All edges have a 'relationship' field
+    F.lit("Links").alias("relationship"),
+    "LinkType",
+)
+
+duplicates_edge_df: DataFrame = (
+    raw_links_edge_df.filter(F.col("LinkType") == "Duplicate")
+    .withColumn("relationship", F.lit("Duplicates"))
+    .select("src", "dst", "relationship")
+)
+print(f"Total Duplicates edges: {duplicates_edge_df.count():,}")
+print(f"Percentage of duplicate posts: {duplicates_edge_df.count() / post_links_df.count():.2%}\n")
+
+linked_edge_df = (
+    raw_links_edge_df.filter(F.col("LinkType") == "Linked")
+    .withColumn("relationship", F.lit("Links"))
+    .select("src", "dst", "relationship")
+)
+print(f"Total Links edges: {linked_edge_df.count():,}")
+print(f"Percentage of linked posts: {linked_edge_df.count() / post_links_df.count():.2%}\n")
+
+
+#
+# Combine all the edges together into one relationships DataFrame
+#
+
+relationships_df: DataFrame = (
+    cast_for_edge_df.unionByName(user_asks_edges_df)
+    .unionByName(user_answers_edges_df)
+    .unionByName(question_answers_edges_df)
+    .unionByName(tags_edge_df)
+    .unionByName(earns_edges_df)
+    .unionByName(duplicates_edge_df)
+    .unionByName(linked_edge_df)
+)
+relationships_df.groupBy("relationship").count().orderBy(F.col("count").desc()).withColumn(
+    "count", F.format_number(F.col("count"), 0)
+).show()
+
+# +------------+------+
+# |relationship| count|
+# +------------+------+
+# |       Earns|43,029|
+# |     CastFor|40,701|
+# |        Tags| 4,427|
+# |     Answers| 2,978|
+# |       Posts| 2,767|
+# |        Asks| 1,934|
+# |       Links| 1,180|
+# |  Duplicates|    88|
+# +------------+------+
+
+EDGES_PATH: str = f"{BASE_PATH}/Edges.parquet"
+
+# Write to disk and back again
+relationships_df.write.mode("overwrite").parquet(EDGES_PATH)
+
+spark.stop()
+print("Spark stopped.")
diff --git a/python/graphframes/tutorials/utils.py b/python/graphframes/tutorials/utils.py
new file mode 100644
index 000000000..54ef40f8b
--- /dev/null
+++ b/python/graphframes/tutorials/utils.py
@@ -0,0 +1,122 @@
+from pyspark.sql import DataFrame
+from graphframes import GraphFrame
+from pyspark.sql import functions as F
+
+
+def three_edge_count(paths: DataFrame) -> DataFrame:
+    """three_edge_count View the counts of the different types of 3-node graphlets in the graph.
+
+    Parameters
+    ----------
+    paths : pyspark.sql.DataFrame
+        A DataFrame of 3-paths in the graph.
+
+    Returns
+    -------
+    DataFrame
+        A DataFrame of the counts of the different types of 3-node graphlets in the graph.
+    """
+    graphlet_type_df = paths.select(
+        F.col("a.Type").alias("A_Type"),
+        F.col("e1.relationship").alias("E_relationship"),
+        F.col("b.Type").alias("B_Type"),
+        F.col("e2.relationship").alias("E2_relationship"),
+        F.col("c.Type").alias("C_Type"),
+        F.col("e3.relationship").alias("E3_relationship"),
+        F.when(F.col("d").isNotNull(), F.col("d.Type")).alias("D_Type"),
+    )
+    graphlet_count_df = (
+        graphlet_type_df.groupby(
+            "A_Type", "E_relationship", "B_Type", "E2_relationship", "C_Type", "E3_relationship"
+        )
+        .count()
+        .orderBy(F.col("count").desc())
+        # Add a comma formatted column for display
+        .withColumn("count", F.format_number(F.col("count"), 0))
+    )
+    return graphlet_count_df
+
+
+def four_edge_count(paths: DataFrame) -> DataFrame:
+    """four_edge_count View the counts of the different types of 4-node graphlets in the graph.
+
+    Parameters
+    ----------
+    paths : DataFrame
+        A DataFrame of 4-paths in the graph.
+
+    Returns
+    -------
+    DataFrame
+        A DataFrame of the counts of the different types of 4-node graphlets in the graph.
+    """
+
+    graphlet_type_df = paths.select(
+        F.col("a.Type").alias("A_Type"),
+        F.col("e1.relationship").alias("E_relationship"),
+        F.col("b.Type").alias("B_Type"),
+        F.col("e2.relationship").alias("E2_relationship"),
+        F.col("c.Type").alias("C_Type"),
+        F.col("e3.relationship").alias("E3_relationship"),
+        F.col("d.Type").alias("D_Type"),
+        F.col("e4.relationship").alias("E4_relationship"),
+        F.when(F.col("e").isNotNull(), F.col("e.Type")).alias("E_Type"),
+    )
+    graphlet_count_df = (
+        graphlet_type_df.groupby(
+            "A_Type",
+            "E_relationship",
+            "B_Type",
+            "E2_relationship",
+            "C_Type",
+            "E3_relationship",
+            "D_Type",
+            "E4_relationship",
+        )
+        .count()
+        .orderBy(F.col("count").desc())
+        # Add a comma formatted column for display
+        .withColumn("count", F.format_number(F.col("count"), 0))
+    )
+    return graphlet_count_df
+
+
+def add_degree(g: GraphFrame) -> GraphFrame:
+    """add_degree compute the degree, adding it as a property of the nodes in the GraphFrame.
+
+    Parameters
+    ----------
+    g : GraphFrame
+        Any valid GraphFrame
+
+    Returns
+    -------
+    GraphFrame
+        Same GraphFrame with a 'degree' property added
+    """
+    degree_vertices: DataFrame = g.vertices.join(g.degrees, on="id")
+    return GraphFrame(degree_vertices, g.edges)
+
+
+def add_type_degree(g: GraphFrame) -> DataFrame:
+    """add_type_degree add a map property to the vertices with the degree by each type of relationship.
+
+    Parameters
+    ----------
+    g : GraphFrame
+        Any valid GraphFrame
+
+    Returns
+    -------
+    DataFrame - I am broke, next line is wrong
+        A GraphFrame with a map[type:degree] 'type_degree' field added to the vertices
+    """
+    type_degree: DataFrame = (
+        g.edges.select(F.col("src").alias("id"), "relationship")
+        .filter(F.col("id").isNotNull())
+        .groupby("id", "relationship")
+        .count()
+    )
+    type_degree = type_degree.withColumn("type_degree", F.create_map(type_degree.columns))
+    type_degree = type_degree.select("src", "type_degree")
+    return g.vertices.join(type_degree, on="src")

From 0a1fabad7ba44d8463b0b4b23cdb360181b583cb Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 10:37:23 -0800
Subject: [PATCH 43/53] Created tutorials dependency group to minimize main
 bloat

---
 python/poetry.lock    | 848 +++++++++++++++++++++++++++++++++++++++++-
 python/pyproject.toml |   5 +
 2 files changed, 850 insertions(+), 3 deletions(-)

diff --git a/python/poetry.lock b/python/poetry.lock
index 0fb5fb139..a96131b72 100644
--- a/python/poetry.lock
+++ b/python/poetry.lock
@@ -47,13 +47,385 @@ d = ["aiohttp (>=3.10)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "brotli"
+version = "1.1.0"
+description = "Python bindings for the Brotli compression library"
+optional = false
+python-versions = "*"
+groups = ["tutorials"]
+markers = "platform_python_implementation == \"CPython\""
+files = [
+    {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"},
+    {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"},
+    {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3"},
+    {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d"},
+    {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e"},
+    {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"},
+    {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"},
+    {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"},
+    {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"},
+    {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6"},
+    {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd"},
+    {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf"},
+    {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61"},
+    {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"},
+    {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"},
+    {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"},
+    {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"},
+    {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"},
+    {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"},
+    {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"},
+    {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"},
+    {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"},
+    {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"},
+    {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"},
+    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"},
+    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"},
+    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4d4a848d1837973bf0f4b5e54e3bec977d99be36a7895c61abb659301b02c112"},
+    {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fdc3ff3bfccdc6b9cc7c342c03aa2400683f0cb891d46e94b64a197910dc4064"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5eeb539606f18a0b232d4ba45adccde4125592f3f636a6182b4a8a436548b914"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"},
+    {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"},
+    {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"},
+    {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"},
+    {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f733d788519c7e3e71f0855c96618720f5d3d60c3cb829d8bbb722dddce37985"},
+    {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:929811df5462e182b13920da56c6e0284af407d1de637d8e536c5cd00a7daf60"},
+    {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b63b949ff929fbc2d6d3ce0e924c9b93c9785d877a21a1b678877ffbbc4423a"},
+    {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d192f0f30804e55db0d0e0a35d83a9fead0e9a359a9ed0285dbacea60cc10a84"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f296c40e23065d0d6650c4aefe7470d2a25fffda489bcc3eb66083f3ac9f6643"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"},
+    {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"},
+    {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"},
+    {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"},
+    {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:03d20af184290887bdea3f0f78c4f737d126c74dc2f3ccadf07e54ceca3bf208"},
+    {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6172447e1b368dcbc458925e5ddaf9113477b0ed542df258d84fa28fc45ceea7"},
+    {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a743e5a28af5f70f9c080380a5f908d4d21d40e8f0e0c8901604d15cfa9ba751"},
+    {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0541e747cce78e24ea12d69176f6a7ddb690e62c425e01d31cc065e69ce55b48"},
+    {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cdbc1fc1bc0bff1cef838eafe581b55bfbffaed4ed0318b724d0b71d4d377619"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:890b5a14ce214389b2cc36ce82f3093f96f4cc730c1cffdbefff77a7c71f2a97"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"},
+    {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"},
+    {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"},
+    {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"},
+    {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f"},
+    {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9"},
+    {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf"},
+    {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac"},
+    {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"},
+    {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"},
+    {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"},
+    {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
+]
+
+[[package]]
+name = "brotlicffi"
+version = "1.1.0.0"
+description = "Python CFFI bindings to the Brotli library"
+optional = false
+python-versions = ">=3.7"
+groups = ["tutorials"]
+markers = "platform_python_implementation == \"PyPy\""
+files = [
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851"},
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b"},
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9feb210d932ffe7798ee62e6145d3a757eb6233aa9a4e7db78dd3690d7755814"},
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84763dbdef5dd5c24b75597a77e1b30c66604725707565188ba54bab4f114820"},
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-win32.whl", hash = "sha256:1b12b50e07c3911e1efa3a8971543e7648100713d4e0971b13631cce22c587eb"},
+    {file = "brotlicffi-1.1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:994a4f0681bb6c6c3b0925530a1926b7a189d878e6e5e38fae8efa47c5d9c613"},
+    {file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2e4aeb0bd2540cb91b069dbdd54d458da8c4334ceaf2d25df2f4af576d6766ca"},
+    {file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b7b0033b0d37bb33009fb2fef73310e432e76f688af76c156b3594389d81391"},
+    {file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54a07bb2374a1eba8ebb52b6fafffa2afd3c4df85ddd38fcc0511f2bb387c2a8"},
+    {file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7901a7dc4b88f1c1475de59ae9be59799db1007b7d059817948d8e4f12e24e35"},
+    {file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce01c7316aebc7fce59da734286148b1d1b9455f89cf2c8a4dfce7d41db55c2d"},
+    {file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:246f1d1a90279bb6069de3de8d75a8856e073b8ff0b09dcca18ccc14cec85979"},
+    {file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc4bc5d82bc56ebd8b514fb8350cfac4627d6b0743382e46d033976a5f80fab6"},
+    {file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37c26ecb14386a44b118ce36e546ce307f4810bc9598a6e6cb4f7fca725ae7e6"},
+    {file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca72968ae4eaf6470498d5c2887073f7efe3b1e7d7ec8be11a06a79cc810e990"},
+    {file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:add0de5b9ad9e9aa293c3aa4e9deb2b61e99ad6c1634e01d01d98c03e6a354cc"},
+    {file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9b6068e0f3769992d6b622a1cd2e7835eae3cf8d9da123d7f51ca9c1e9c333e5"},
+    {file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8557a8559509b61e65083f8782329188a250102372576093c88930c875a69838"},
+    {file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a7ae37e5d79c5bdfb5b4b99f2715a6035e6c5bf538c3746abc8e26694f92f33"},
+    {file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391151ec86bb1c683835980f4816272a87eaddc46bb91cbf44f62228b84d8cca"},
+    {file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2f3711be9290f0453de8eed5275d93d286abe26b08ab4a35d7452caa1fef532f"},
+    {file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a807d760763e398bbf2c6394ae9da5815901aa93ee0a37bca5efe78d4ee3171"},
+    {file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa8ca0623b26c94fccc3a1fdd895be1743b838f3917300506d04aa3346fd2a14"},
+    {file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3de0cf28a53a3238b252aca9fed1593e9d36c1d116748013339f0949bfc84112"},
+    {file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be5ec0e88a4925c91f3dea2bb0013b3a2accda6f77238f76a34a1ea532a1cb0"},
+    {file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d9eb71bb1085d996244439154387266fd23d6ad37161f6f52f1cd41dd95a3808"},
+    {file = "brotlicffi-1.1.0.0.tar.gz", hash = "sha256:b77827a689905143f87915310b93b273ab17888fd43ef350d4832c4a71083c13"},
+]
+
+[package.dependencies]
+cffi = ">=1.0.0"
+
+[[package]]
+name = "certifi"
+version = "2025.1.31"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+groups = ["tutorials"]
+files = [
+    {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
+    {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
+]
+
+[[package]]
+name = "cffi"
+version = "1.17.1"
+description = "Foreign Function Interface for Python calling C code."
+optional = false
+python-versions = ">=3.8"
+groups = ["tutorials"]
+markers = "platform_python_implementation == \"PyPy\""
+files = [
+    {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
+    {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
+    {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
+    {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
+    {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
+    {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
+    {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
+    {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
+    {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
+    {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"},
+    {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"},
+    {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"},
+    {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"},
+    {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
+    {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
+]
+
+[package.dependencies]
+pycparser = "*"
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.1"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7"
+groups = ["tutorials"]
+files = [
+    {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"},
+    {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"},
+    {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"},
+    {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"},
+    {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"},
+    {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"},
+    {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"},
+    {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"},
+    {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"},
+    {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
+]
+
 [[package]]
 name = "click"
 version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
-groups = ["dev"]
+groups = ["dev", "tutorials"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -68,7 +440,7 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["dev"]
+groups = ["dev", "tutorials"]
 markers = "platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
@@ -92,6 +464,77 @@ mccabe = ">=0.7.0,<0.8.0"
 pycodestyle = ">=2.12.0,<2.13.0"
 pyflakes = ">=3.2.0,<3.3.0"
 
+[[package]]
+name = "idna"
+version = "3.10"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.6"
+groups = ["tutorials"]
+files = [
+    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
+    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
+]
+
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
+[[package]]
+name = "inflate64"
+version = "1.0.1"
+description = "deflate64 compression/decompression library"
+optional = false
+python-versions = ">=3.9"
+groups = ["tutorials"]
+files = [
+    {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5122a188995e47a735ab969edc9129d42bbd97b993df5a3f0819b87205ce81b4"},
+    {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:975ed694c680e46a5c0bb872380a9c9da271a91f9c0646561c58e8f3714347d4"},
+    {file = "inflate64-1.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bcaf445d9cda5f7358e0c2b78144641560f8ce9e3e4351099754c49d26a34e8"},
+    {file = "inflate64-1.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daede09baba24117279109b30fdf935195e91957e31b995b86f8dd01711376ee"},
+    {file = "inflate64-1.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df40eaaba4fb8379d5c4fa5f56cc24741c4f1a91d4aef66438207473351ceaa"},
+    {file = "inflate64-1.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ef90855ff63d53c8fd3bfbf85b5280b22f82b9ab2e21a7eee45b8a19d9866c42"},
+    {file = "inflate64-1.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5daa4566c0b009c9ab8a6bf18ce407d14f5dbbb0d3068f3a43af939a17e117a7"},
+    {file = "inflate64-1.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:d58a360b59685561a8feacee743479a9d7cc17c8d210aa1f2ae221f2513973cb"},
+    {file = "inflate64-1.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31198c5f156806cee05b69b149074042b7b7d39274ff4c259b898e617294ac17"},
+    {file = "inflate64-1.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ab693bb1cd92573a997f8fe7b90a2ec1e17a507884598f5640656257b95ef49"},
+    {file = "inflate64-1.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:95b6a60e305e6e759e37d6c36691fcb87678922c56b3ddc2df06cd56e04f41f6"},
+    {file = "inflate64-1.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:711ef889bdb3b3b296881d1e49830a3a896938fba7033c4287f1aed9b9a20111"},
+    {file = "inflate64-1.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3178495970ecb5c6a32167a8b57fdeef3bf4e2843eaf8f2d8f816f523741e36"},
+    {file = "inflate64-1.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e8373b7feedf10236eb56d21598a19a3eb51077c3702d0ce3456b827374025e1"},
+    {file = "inflate64-1.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf026d5c885f2d2bbf233e9a0c8c6d046ec727e2467024ffe0ac76b5be308258"},
+    {file = "inflate64-1.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:3aa7489241e6c6f6d34b9561efdf06031c35305b864267a5b8f406abcd3e85c5"},
+    {file = "inflate64-1.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b81b3d373190ecd82901f42afd90b7127e9bdef341032a94db381c750ed3ddb2"},
+    {file = "inflate64-1.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbfddc5dac975227c20997f0ac515917a15421767c6bff0c209ac6ff9d7b17cc"},
+    {file = "inflate64-1.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2adeabe79cc2f90bca832673520c8cbad7370f86353e151293add7ca529bed34"},
+    {file = "inflate64-1.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b235c97a05dbe2f92f0f057426e4d05a449e1fccf8e9aa88075ea9c6a06a182"},
+    {file = "inflate64-1.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19b74e30734dca5f1c83ca07074e1f25bf7b63f4a5ee7e074d9a4cb05af65cd5"},
+    {file = "inflate64-1.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b298feb85204b5ef148ccf807744c836fffed7c1ed3ec8bc9b4e323a03163291"},
+    {file = "inflate64-1.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8a4c75241bc442267f79b8242135f2ded29405662c44b9353d34fbd4fa6e56b3"},
+    {file = "inflate64-1.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:7b210392f0830ab27371e36478592f47757f5ea6c09ddb96e2125847b309eb5e"},
+    {file = "inflate64-1.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8dd58aa1adc4f98bf9b52baffa8f2ddf589e071a90db2f2bec9024328d4608cf"},
+    {file = "inflate64-1.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c108be2b87e88c966570f84f839eb37f489b45dc3fa3046dc228327af6e921bb"},
+    {file = "inflate64-1.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63971c6b096c0d533c0e38b4257f5a7748501a8bc04d00cf239bd06467888703"},
+    {file = "inflate64-1.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d0077edb6b1cabfa2223b71a4a725e5755148f551a7a396c7d5698e45fb8828"},
+    {file = "inflate64-1.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05b5f2a6f1bf2f70e9c20d997261711cbc1ae477379662b05b36911da60a67"},
+    {file = "inflate64-1.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f3c7402165f7e15789caa0787e5a349465d9a454105d0c3a0ccf2e9cdfb8117"},
+    {file = "inflate64-1.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:39bced168822e4bf2f545d1b6dbeded6db01c32629d9e4549ef2cd1604a12e1b"},
+    {file = "inflate64-1.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:70bb6a22d300d8ca25c26bc60afb5662c5a96d97a801962874d0461568512789"},
+    {file = "inflate64-1.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f3d5ea758358a1cc50f9e8e41de2134e9b5c5ca8bbcd88d1cd135d0e953d0fa8"},
+    {file = "inflate64-1.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fa102c834314c3d7edbf249d1be0bce5d12a9e122228a7ac3f861ee82c3dc5c"},
+    {file = "inflate64-1.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c2ae56a34e6cc2a712418ac82332e5d550ef8599e0ffb64c19b86d63a7df0c5"},
+    {file = "inflate64-1.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9808ae50b5db661770992566e51e648cac286c32bd80892b151e7b1eca81afe8"},
+    {file = "inflate64-1.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:04b2788c6a26e1e525f53cc3d8c58782d41f18bef8d2a34a3d58beaaf0bfdd3b"},
+    {file = "inflate64-1.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67fd5b1f9e433b0abab8cb91f4da94d16223a5241008268a57f4729fdbfc4dbc"},
+    {file = "inflate64-1.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6f3b00c17ae365e82fc3d48ff9a7a566820a6c8c55b4e16c6cfbcbd46505a72"},
+    {file = "inflate64-1.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:91c0c1d41c1655fb0189630baaa894a3b778d77062bb90ca11db878422948395"},
+    {file = "inflate64-1.0.1.tar.gz", hash = "sha256:3b1c83c22651b5942b35829df526e89602e494192bf021e0d7d0b600e76c429d"},
+]
+
+[package.extras]
+check = ["check-manifest", "flake8", "flake8-black", "flake8-deprecated", "flake8-isort", "mypy (>=1.10.0)", "mypy_extensions (>=0.4.1)", "pygments", "readme-renderer", "twine"]
+docs = ["docutils", "sphinx (>=5.0)"]
+test = ["pytest"]
+
 [[package]]
 name = "isort"
 version = "6.0.0"
@@ -120,6 +563,23 @@ files = [
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
 ]
 
+[[package]]
+name = "multivolumefile"
+version = "0.2.3"
+description = "multi volume file wrapper library"
+optional = false
+python-versions = ">=3.6"
+groups = ["tutorials"]
+files = [
+    {file = "multivolumefile-0.2.3-py3-none-any.whl", hash = "sha256:237f4353b60af1703087cf7725755a1f6fcaeeea48421e1896940cd1c920d678"},
+    {file = "multivolumefile-0.2.3.tar.gz", hash = "sha256:a0648d0aafbc96e59198d5c17e9acad7eb531abea51035d08ce8060dcad709d6"},
+]
+
+[package.extras]
+check = ["check-manifest", "flake8", "flake8-black", "isort (>=5.0.3)", "pygments", "readme-renderer", "twine"]
+test = ["coverage[toml] (>=5.2)", "coveralls (>=2.1.1)", "hypothesis", "pyannotate", "pytest", "pytest-cov"]
+type = ["mypy", "mypy-extensions"]
+
 [[package]]
 name = "mypy-extensions"
 version = "1.0.0"
@@ -241,6 +701,31 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
 type = ["mypy (>=1.11.2)"]
 
+[[package]]
+name = "psutil"
+version = "7.0.0"
+description = "Cross-platform lib for process and system monitoring in Python.  NOTE: the syntax of this script MUST be kept compatible with Python 2.7."
+optional = false
+python-versions = ">=3.6"
+groups = ["tutorials"]
+markers = "sys_platform != \"cygwin\""
+files = [
+    {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"},
+    {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"},
+    {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"},
+    {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"},
+    {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"},
+    {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"},
+    {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"},
+    {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"},
+    {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"},
+    {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"},
+]
+
+[package.extras]
+dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"]
+test = ["pytest", "pytest-xdist", "setuptools"]
+
 [[package]]
 name = "py4j"
 version = "0.10.9.7"
@@ -253,6 +738,92 @@ files = [
     {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
 ]
 
+[[package]]
+name = "py7zr"
+version = "0.22.0"
+description = "Pure python 7-zip library"
+optional = false
+python-versions = ">=3.8"
+groups = ["tutorials"]
+files = [
+    {file = "py7zr-0.22.0-py3-none-any.whl", hash = "sha256:993b951b313500697d71113da2681386589b7b74f12e48ba13cc12beca79d078"},
+    {file = "py7zr-0.22.0.tar.gz", hash = "sha256:c6c7aea5913535184003b73938490f9a4d8418598e533f9ca991d3b8e45a139e"},
+]
+
+[package.dependencies]
+brotli = {version = ">=1.1.0", markers = "platform_python_implementation == \"CPython\""}
+brotlicffi = {version = ">=1.1.0.0", markers = "platform_python_implementation == \"PyPy\""}
+inflate64 = ">=1.0.0,<1.1.0"
+multivolumefile = ">=0.2.3"
+psutil = {version = "*", markers = "sys_platform != \"cygwin\""}
+pybcj = ">=1.0.0,<1.1.0"
+pycryptodomex = ">=3.16.0"
+pyppmd = ">=1.1.0,<1.2.0"
+pyzstd = ">=0.15.9"
+texttable = "*"
+
+[package.extras]
+check = ["black (>=23.1.0)", "check-manifest", "flake8 (<8)", "flake8-black (>=0.3.6)", "flake8-deprecated", "flake8-isort", "isort (>=5.0.3)", "lxml", "mypy (>=0.940)", "mypy-extensions (>=0.4.1)", "pygments", "readme-renderer", "twine", "types-psutil"]
+debug = ["pytest", "pytest-leaks", "pytest-profiling"]
+docs = ["docutils", "sphinx (>=5.0)", "sphinx-a4doc", "sphinx-py3doc-enhanced-theme"]
+test = ["coverage[toml] (>=5.2)", "coveralls (>=2.1.1)", "py-cpuinfo", "pytest", "pytest-benchmark", "pytest-cov", "pytest-remotedata", "pytest-timeout"]
+test-compat = ["libarchive-c"]
+
+[[package]]
+name = "pybcj"
+version = "1.0.3"
+description = "bcj filter library"
+optional = false
+python-versions = ">=3.9"
+groups = ["tutorials"]
+files = [
+    {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0bd8afeacf9173af091a08783aa9111500f5619ce0ae486bffb5ee4d08a331b4"},
+    {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc81d3c941485e7d3c2812834ca005849fe91a624977ed5227658cf952d19696"},
+    {file = "pybcj-1.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f01b75621452578ccd48a79819bc95ddac41535e16aa163ea1d86b14258afa00"},
+    {file = "pybcj-1.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e08431845702173d50d66cbbd169969d7b7cf67992f5fb7bc27a8c67e19d3d1f"},
+    {file = "pybcj-1.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:476f3c815b85e563d13238c4310b9cb47aefd0c51ac1b33312e41fcd079ea94f"},
+    {file = "pybcj-1.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:97bfd712bfce0d58099a02acc05b15b1d1aa3e6edf4dd8e018f43349182ffa3f"},
+    {file = "pybcj-1.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d1374806cde777bc6e371f79c7f3acfb2b0906a418e04cf5331866a321633c3"},
+    {file = "pybcj-1.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9245039e0fc87921f702133c019722e333934e61f1c90408f16618d585ff88ec"},
+    {file = "pybcj-1.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae30aa62deff1ba40e4f13ef6964cf083ece541dbfb3ec3731c1fc58cc218b7d"},
+    {file = "pybcj-1.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6639f5443bc696a981a502c37e1393398a7182d61820eb39ee6d122076b6ad8c"},
+    {file = "pybcj-1.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4502c5afa2a41e569b94527bbb46185ee1a378a4fb3e9d7806ad10e892ecdf58"},
+    {file = "pybcj-1.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4ff48aaadd8fd91ac02557eec225ce7c1a3b627a6832d6cb723469891b3b242"},
+    {file = "pybcj-1.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62668bd0a1aedaa3b779615cf129d9469fd709ab8d944aa07aad68dc189de349"},
+    {file = "pybcj-1.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8af60d5eeed32fd1a9f6a2a11eef47cb7ebd80fe9853e709a2c1d9e29108cdf2"},
+    {file = "pybcj-1.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:68e1bd1b0836e216cce3d9a33795501dfc956c61ff52768737e26286e65a3771"},
+    {file = "pybcj-1.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:05738d44a987422e21f4ee15023a8c4f38a5509fdf6e6f6dfaaf43ca05cef7db"},
+    {file = "pybcj-1.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c68a3fe847f22a8393fe71b1b16450b6b9e8ef36faa36d0c03759f58740f6eff"},
+    {file = "pybcj-1.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:17f610ede3a766c0ff1869a4dd7750db78d39e4bfc9997f6bef050fe794c051b"},
+    {file = "pybcj-1.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:15f776925a4d6f69b344cde9035fc8f1fd02f1f2a4ccb76f4047406c0ea4241d"},
+    {file = "pybcj-1.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdda28e0a20214c7f0e7de9e260122b9197106231249bf07a5ca5b84a5d38a1"},
+    {file = "pybcj-1.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:764cba20166fcd9ff580f4d877f17807be057da7d1234caaf54fd5fd5c591387"},
+    {file = "pybcj-1.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:97cf7f788560c3283a8afe3de585abb849bb1338d007e53fb6441d6ccd202e0a"},
+    {file = "pybcj-1.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26d201f773d17d5e8a88785f00fa73a6647e080d933e75ddeb33da7f0baff657"},
+    {file = "pybcj-1.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:990047ac176317d42e7059b3cd357ff7c7201f3e3f08b35d083b2004d066cd39"},
+    {file = "pybcj-1.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3bbbf22687c9f6c57cc9b605a3a60937230843ff1b5560e2a42133fd4dd5dc73"},
+    {file = "pybcj-1.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e0a75d5ec3fa40af865f93f29e613d93fb67dc016fc60e64a4b3a4621076fecd"},
+    {file = "pybcj-1.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:631bcdea0d47ae562f118f8404fb6ef5813eb2dcfbcc53c7b9ac6bc5d4c2ef32"},
+    {file = "pybcj-1.0.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75c9430a10e69fbea336668944c0f4a9979e0bb3ab5de820315025c157baa2ae"},
+    {file = "pybcj-1.0.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5221652a9c656f6b27fda389cc4888354a287d3e0f6ea6d5b70718b6d9ec110d"},
+    {file = "pybcj-1.0.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f6a6c3a776aa9b579c51768d2c727d3912cd8e1c2add61898dc6794b269e7ab3"},
+    {file = "pybcj-1.0.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:cb50276bd804f58690571c13e2e6eb26eca6c4a39a611591e2202136dca1b7a5"},
+    {file = "pybcj-1.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:623a4eef080f5cb0405ce19f90fa9824e2477f4a85d8b888e613cf7f146b84d1"},
+    {file = "pybcj-1.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:47d2a0f33dfd55dfa961502922d2b0f090857585b321f838f1c2510de4e66a9a"},
+    {file = "pybcj-1.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cf8ac15785412aa6924818fb86e250ae15e8238b7db7d410e28d3ae0743cdbd3"},
+    {file = "pybcj-1.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de02d2933fef5b26d845d2e002996c5e22c710af5b5dfc930285dff09db885cf"},
+    {file = "pybcj-1.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40a0f542dba6d079d702c1c129cc8cdc0f20bf2c5cb45defba8d5ac8e2d691a1"},
+    {file = "pybcj-1.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace508285fd4788845a208dd00f1c7af8e68dd222cf7797ae525562a2eb22bab"},
+    {file = "pybcj-1.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6da2b0c120a415fa5620b76110bab487de20f8a108756499fd4df9c92fc10098"},
+    {file = "pybcj-1.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9c6347f1e2c78cf2584fddebe6fb9dc036b75020887facec1bab149fd6056c6"},
+    {file = "pybcj-1.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:be309c0fbf06b1e8cd1c40b24dd621271b5fb5d9fe7a0becb40ed64ac92ff50b"},
+    {file = "pybcj-1.0.3.tar.gz", hash = "sha256:b8873637f0be00ceaa372d0fb81693604b4bbc8decdb2b1ae5f9b84d196788d9"},
+]
+
+[package.extras]
+check = ["check-manifest", "flake8 (<5)", "flake8-black", "flake8-colors", "flake8-isort", "flake8-pyi", "flake8-typing-imports", "mypy (>=1.10.0)", "pygments", "readme-renderer"]
+test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-cov"]
+
 [[package]]
 name = "pycodestyle"
 version = "2.12.1"
@@ -265,6 +836,61 @@ files = [
     {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
 ]
 
+[[package]]
+name = "pycparser"
+version = "2.22"
+description = "C parser in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["tutorials"]
+markers = "platform_python_implementation == \"PyPy\""
+files = [
+    {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
+    {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
+]
+
+[[package]]
+name = "pycryptodomex"
+version = "3.21.0"
+description = "Cryptographic library for Python"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["tutorials"]
+files = [
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dbeb84a399373df84a69e0919c1d733b89e049752426041deeb30d68e9867822"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a192fb46c95489beba9c3f002ed7d93979423d1b2a53eab8771dbb1339eb3ddd"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:1233443f19d278c72c4daae749872a4af3787a813e05c3561c73ab0c153c7b0f"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbb07f88e277162b8bfca7134b34f18b400d84eac7375ce73117f865e3c80d4c"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:e859e53d983b7fe18cb8f1b0e29d991a5c93be2c8dd25db7db1fe3bd3617f6f9"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-win32.whl", hash = "sha256:ef046b2e6c425647971b51424f0f88d8a2e0a2a63d3531817968c42078895c00"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27m-win_amd64.whl", hash = "sha256:da76ebf6650323eae7236b54b1b1f0e57c16483be6e3c1ebf901d4ada47563b6"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:c07e64867a54f7e93186a55bec08a18b7302e7bee1b02fd84c6089ec215e723a"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:56435c7124dd0ce0c8bdd99c52e5d183a0ca7fdcd06c5d5509423843f487dd0b"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65d275e3f866cf6fe891411be9c1454fb58809ccc5de6d3770654c47197acd65"},
+    {file = "pycryptodomex-3.21.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:5241bdb53bcf32a9568770a6584774b1b8109342bd033398e4ff2da052123832"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:34325b84c8b380675fd2320d0649cdcbc9cf1e0d1526edbe8fce43ed858cdc7e"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:103c133d6cd832ae7266feb0a65b69e3a5e4dbbd6f3a3ae3211a557fd653f516"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77ac2ea80bcb4b4e1c6a596734c775a1615d23e31794967416afc14852a639d3"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aa0cf13a1a1128b3e964dc667e5fe5c6235f7d7cfb0277213f0e2a783837cc2"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:46eb1f0c8d309da63a2064c28de54e5e614ad17b7e2f88df0faef58ce192fc7b"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:cc7e111e66c274b0df5f4efa679eb31e23c7545d702333dfd2df10ab02c2a2ce"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:770d630a5c46605ec83393feaa73a9635a60e55b112e1fb0c3cea84c2897aa0a"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:52e23a0a6e61691134aa8c8beba89de420602541afaae70f66e16060fdcd677e"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-win32.whl", hash = "sha256:a3d77919e6ff56d89aada1bd009b727b874d464cb0e2e3f00a49f7d2e709d76e"},
+    {file = "pycryptodomex-3.21.0-cp36-abi3-win_amd64.whl", hash = "sha256:b0e9765f93fe4890f39875e6c90c96cb341767833cfa767f41b490b506fa9ec0"},
+    {file = "pycryptodomex-3.21.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:feaecdce4e5c0045e7a287de0c4351284391fe170729aa9182f6bd967631b3a8"},
+    {file = "pycryptodomex-3.21.0-pp27-pypy_73-win32.whl", hash = "sha256:365aa5a66d52fd1f9e0530ea97f392c48c409c2f01ff8b9a39c73ed6f527d36c"},
+    {file = "pycryptodomex-3.21.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3efddfc50ac0ca143364042324046800c126a1d63816d532f2e19e6f2d8c0c31"},
+    {file = "pycryptodomex-3.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df2608682db8279a9ebbaf05a72f62a321433522ed0e499bc486a6889b96bf3"},
+    {file = "pycryptodomex-3.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5823d03e904ea3e53aebd6799d6b8ec63b7675b5d2f4a4bd5e3adcb512d03b37"},
+    {file = "pycryptodomex-3.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:27e84eeff24250ffec32722334749ac2a57a5fd60332cd6a0680090e7c42877e"},
+    {file = "pycryptodomex-3.21.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8ef436cdeea794015263853311f84c1ff0341b98fc7908e8a70595a68cefd971"},
+    {file = "pycryptodomex-3.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a1058e6dfe827f4209c5cae466e67610bcd0d66f2f037465daa2a29d92d952b"},
+    {file = "pycryptodomex-3.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ba09a5b407cbb3bcb325221e346a140605714b5e880741dc9a1e9ecf1688d42"},
+    {file = "pycryptodomex-3.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8a9d8342cf22b74a746e3c6c9453cb0cfbb55943410e3a2619bd9164b48dc9d9"},
+    {file = "pycryptodomex-3.21.0.tar.gz", hash = "sha256:222d0bd05381dd25c32dd6065c071ebf084212ab79bab4599ba9e6a3e0009e6c"},
+]
+
 [[package]]
 name = "pyflakes"
 version = "3.2.0"
@@ -277,6 +903,77 @@ files = [
     {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
 ]
 
+[[package]]
+name = "pyppmd"
+version = "1.1.1"
+description = "PPMd compression/decompression library"
+optional = false
+python-versions = ">=3.9"
+groups = ["tutorials"]
+files = [
+    {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:406b184132c69e3f60ea9621b69eaa0c5494e83f82c307b3acce7b86a4f8f888"},
+    {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2cf003bb184adf306e1ac1828107307927737dde63474715ba16462e266cbef"},
+    {file = "pyppmd-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:71c8fd0ecc8d4760e852dd6df19d1a827427cb9e6c9e568cbf5edba7d860c514"},
+    {file = "pyppmd-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6b5edee08b66ad6c39fd4d34a7ef4cfeb4b69fd6d68957e59cd2db674611a9e"},
+    {file = "pyppmd-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e95bd23eb1543ab3149f24fe02f6dd2695023326027a4b989fb2c6dba256e75e"},
+    {file = "pyppmd-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e633ee4cc19d0c71b3898092c3c4cc20a10bd5e6197229fffac29d68ad5d83b8"},
+    {file = "pyppmd-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ecaafe2807ef557f0c49b8476a4fa04091b43866072fbcf31b3ceb01a96c9168"},
+    {file = "pyppmd-1.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c182fccff60ae8f24f28f5145c36a60708b5b041a25d36b67f23c44923552fa4"},
+    {file = "pyppmd-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:70c93d19efe67cdac3e7fa2d4e171650a2c4f90127a9781b25e496a43f12fbbc"},
+    {file = "pyppmd-1.1.1-cp310-cp310-win32.whl", hash = "sha256:57c75856920a210ed72b553885af7bc06eddfd30ff26b62a3a63cb8f86f3d217"},
+    {file = "pyppmd-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:d5293f10dc8c1d571b780e0d54426d3d858c19bbd8cb0fe972dcea3906acd05c"},
+    {file = "pyppmd-1.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:753c5297c91c059443caef33bccbffb10764221739d218046981638aeb9bc5f2"},
+    {file = "pyppmd-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9b5a73da09de480a94793c9064876af14a01be117de872737935ac447b7cde3c"},
+    {file = "pyppmd-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89c6febb7114dea02a061143d78d04751a945dfcadff77560e9a3d3c7583c24b"},
+    {file = "pyppmd-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0001e467c35e35e6076a8c32ed9074aa45833615ee16115de9282d5c0985a1d8"},
+    {file = "pyppmd-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c76820db25596afc859336ba06c01c9be0ff326480beec9c699fd378a546a77f"},
+    {file = "pyppmd-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b67f0a228f8c58750a21ba667c170ae957283e08fd580857f13cb686334e5b3e"},
+    {file = "pyppmd-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b18f24c14f0b0f1757a42c458ae7b6fd7aa0bce8147ac1016a9c134068c1ccc2"},
+    {file = "pyppmd-1.1.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c9e43729161cc3b6ad5b04b16bae7665d3c0cc803de047d8a979aa9232a4f94a"},
+    {file = "pyppmd-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fe057d254528b4eeebe2800baefde47d6af679bae184d3793c13a06f794df442"},
+    {file = "pyppmd-1.1.1-cp311-cp311-win32.whl", hash = "sha256:faa51240493a5c53c9b544c99722f70303eea702742bf90f3c3064144342da4a"},
+    {file = "pyppmd-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:62486f544d6957e1381147e3961eee647b7f4421795be4fb4f1e29d52aee6cb5"},
+    {file = "pyppmd-1.1.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9877ef273e2c0efdec740855e28004a708ada9012e0db6673df4bb6eba3b05e0"},
+    {file = "pyppmd-1.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f816a5cbccceced80e15335389eeeaf1b56a605fb7eebe135b1c85bd161e288c"},
+    {file = "pyppmd-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6bddabf8f2c6b991d15d6785e603d9d414ae4a791f131b1a729bb8a5d31133d1"},
+    {file = "pyppmd-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:855bc2b0d19c3fead5815d72dbe350b4f765334336cbf8bcb504d46edc9e9dd2"},
+    {file = "pyppmd-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a95b11b3717c083b912f0879678ba72f301bbdb9b69efed46dbc5df682aa3ce7"},
+    {file = "pyppmd-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38b645347b6ea217b0c58e8edac27473802868f152db520344ac8c7490981849"},
+    {file = "pyppmd-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f8f94b6222262def5b532f2b9716554ef249ad8411fd4da303596cc8c2e8eda1"},
+    {file = "pyppmd-1.1.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1c0306f69ceddf385ef689ebd0218325b7e523c48333d87157b37393466cfa1e"},
+    {file = "pyppmd-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4ba510457a56535522a660098399e3fa8722e4de55808d089c9d13435d87069"},
+    {file = "pyppmd-1.1.1-cp312-cp312-win32.whl", hash = "sha256:032f040a89fd8348109e8638f94311bd4c3c693fb4cad213ad06a37c203690b1"},
+    {file = "pyppmd-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:2be8cbd13dd59fad1a0ad38062809e28596f3673b77a799dfe82b287986265ed"},
+    {file = "pyppmd-1.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9458f972f090f3846fc5bea0a6f7363da773d3c4b2d4654f1d4ca3c11f6ecbfa"},
+    {file = "pyppmd-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44811a9d958873d857ca81cebf7ba646a0952f8a7bbf8a60cf6ec5d002faa040"},
+    {file = "pyppmd-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a1b12460958885ca44e433986644009d0599b87a444f668ce3724a46ce588924"},
+    {file = "pyppmd-1.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:200c74f05b97b00f047cf60607914a0b50f80991f1fb3677f624a85aa79d9458"},
+    {file = "pyppmd-1.1.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ebe0d98a341b32f164e860059243e125398865cc0363b32ffc31f953460fe87"},
+    {file = "pyppmd-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf93e1e047a82f1e7e194fcf49da166d2b9d8dc98d7c0b5cd844dc4360d9c1f5"},
+    {file = "pyppmd-1.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f5b0b8c746bde378ae3b4df42a11fd8599ba3e5808dfea36e16d722b74bd0506"},
+    {file = "pyppmd-1.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bcdd5207b6c79887f25639632ca2623a399d8c54f567973e9ba474b5ebae2b1c"},
+    {file = "pyppmd-1.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7bfcca94e5452b6d54ac24a11c2402f6a193c331e5dc221c1f1df71773624374"},
+    {file = "pyppmd-1.1.1-cp39-cp39-win32.whl", hash = "sha256:18e99c074664f996f511bc6e87aab46bc4c75f5bd0157d3210292919be35e22c"},
+    {file = "pyppmd-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:b29788d5a0f8f39ea46a1255cd886daddf9c64ba9d4cb64677bc93bd3859ac0e"},
+    {file = "pyppmd-1.1.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28648ef56793bf1ed0ff24728642f56fa39cb96ea161dec6ee2d26f97c0cdd28"},
+    {file = "pyppmd-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:427d6f9b9c011e032db9529b2a15773f2e2944ca490b67d5757f4af33bbda406"},
+    {file = "pyppmd-1.1.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34c7a07197a03656c1920fd88e05049c155a955c4de4b8b8a8e5fec19a97b45b"},
+    {file = "pyppmd-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1fea2eee28beca61165c4714dcd032de76af318553791107d308b4b08575ecc"},
+    {file = "pyppmd-1.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:04391e4f82c8c2c316ba60e480300ad1af37ec12bdb5c20f06b502030ff35975"},
+    {file = "pyppmd-1.1.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:cf08a354864c352a94e6e53733009baeab1e7c570010c4f5be226923ecfa09d1"},
+    {file = "pyppmd-1.1.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:334e5fe5d75764b87c591a16d2b2df6f9939e2ad114dacf98bb4b0e7c90911e9"},
+    {file = "pyppmd-1.1.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15d5928b25f04f5431585d17c835cd509a34e1c9f1416653db8d2815e97d4e20"},
+    {file = "pyppmd-1.1.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af06329796a4965788910ac40f1b012d2e173ede08456ceea0ec7fc4d2e69d62"},
+    {file = "pyppmd-1.1.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4ccdd3751e432e71e02de96f16fc8824e4f4bfc47a8b470f0c7aae88dae4c666"},
+    {file = "pyppmd-1.1.1.tar.gz", hash = "sha256:f1a812f1e7628f4c26d05de340b91b72165d7b62778c27d322b82ce2e8ff00cb"},
+]
+
+[package.extras]
+check = ["check-manifest", "flake8", "flake8-black", "flake8-isort", "mypy (>=1.10.0)", "pygments", "readme-renderer"]
+docs = ["sphinx", "sphinx_rtd_theme"]
+fuzzer = ["atheris", "hypothesis"]
+test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-benchmark", "pytest-cov", "pytest-timeout"]
+
 [[package]]
 name = "pyspark"
 version = "3.5.4"
@@ -298,6 +995,133 @@ mllib = ["numpy (>=1.15,<2)"]
 pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
 sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
 
+[[package]]
+name = "pyzstd"
+version = "0.16.2"
+description = "Python bindings to Zstandard (zstd) compression library."
+optional = false
+python-versions = ">=3.5"
+groups = ["tutorials"]
+files = [
+    {file = "pyzstd-0.16.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:637376c8f8cbd0afe1cab613f8c75fd502bd1016bf79d10760a2d5a00905fe62"},
+    {file = "pyzstd-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e7a7118cbcfa90ca2ddbf9890c7cb582052a9a8cf2b7e2c1bbaf544bee0f16a"},
+    {file = "pyzstd-0.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a74cb1ba05876179525144511eed3bd5a509b0ab2b10632c1215a85db0834dfd"},
+    {file = "pyzstd-0.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c084dde218ffbf112e507e72cbf626b8f58ce9eb23eec129809e31037984662"},
+    {file = "pyzstd-0.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4646459ebd3d7a59ddbe9312f020bcf7cdd1f059a2ea07051258f7af87a0b31"},
+    {file = "pyzstd-0.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14bfc2833cc16d7657fc93259edeeaa793286e5031b86ca5dc861ba49b435fce"},
+    {file = "pyzstd-0.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f27d488f19e5bf27d1e8aa1ae72c6c0a910f1e1ffbdf3c763d02ab781295dd27"},
+    {file = "pyzstd-0.16.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:91e134ca968ff7dcfa8b7d433318f01d309b74ee87e0d2bcadc117c08e1c80db"},
+    {file = "pyzstd-0.16.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6b5f64cd3963c58b8f886eb6139bb8d164b42a74f8a1bb95d49b4804f4592d61"},
+    {file = "pyzstd-0.16.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0b4a8266871b9e0407f9fd8e8d077c3558cf124d174e6357b523d14f76971009"},
+    {file = "pyzstd-0.16.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:1bb19f7acac30727354c25125922aa59f44d82e0e6a751df17d0d93ff6a73853"},
+    {file = "pyzstd-0.16.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3008325b7368e794d66d4d98f2ee1d867ef5afd09fd388646ae02b25343c420d"},
+    {file = "pyzstd-0.16.2-cp310-cp310-win32.whl", hash = "sha256:66f2d5c0bbf5bf32c577aa006197b3525b80b59804450e2c32fbcc2d16e850fd"},
+    {file = "pyzstd-0.16.2-cp310-cp310-win_amd64.whl", hash = "sha256:5fe5f5459ebe1161095baa7a86d04ab625b35148f6c425df0347ed6c90a2fd58"},
+    {file = "pyzstd-0.16.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c1bdbe7f01c7f37d5cd07be70e32a84010d7dfd6677920c0de04cf7d245b60d"},
+    {file = "pyzstd-0.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1882a3ceaaf9adc12212d587d150ec5e58cfa9a765463d803d739abbd3ac0f7a"},
+    {file = "pyzstd-0.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea46a8b9d60f6a6eba29facba54c0f0d70328586f7ef0da6f57edf7e43db0303"},
+    {file = "pyzstd-0.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d7865bc06589cdcecdede0deefe3da07809d5b7ad9044c224d7b2a0867256957"},
+    {file = "pyzstd-0.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:52f938a65b409c02eb825e8c77fc5ea54508b8fc44b5ce226db03011691ae8cc"},
+    {file = "pyzstd-0.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e97620d3f53a0282947304189deef7ca7f7d0d6dfe15033469dc1c33e779d5e5"},
+    {file = "pyzstd-0.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c40e9983d017108670dc8df68ceef14c7c1cf2d19239213274783041d0e64c"},
+    {file = "pyzstd-0.16.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7cd4b3b2c6161066e4bde6af1cf78ed3acf5d731884dd13fdf31f1db10830080"},
+    {file = "pyzstd-0.16.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:454f31fd84175bb203c8c424f2255a343fa9bd103461a38d1bf50487c3b89508"},
+    {file = "pyzstd-0.16.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5ef754a93743f08fb0386ce3596780bfba829311b49c8f4107af1a4bcc16935d"},
+    {file = "pyzstd-0.16.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:be81081db9166e10846934f0e3576a263cbe18d81eca06e6a5c23533f8ce0dc6"},
+    {file = "pyzstd-0.16.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:738bcb2fa1e5f1868986f5030955e64de53157fa1141d01f3a4daf07a1aaf644"},
+    {file = "pyzstd-0.16.2-cp311-cp311-win32.whl", hash = "sha256:0ea214c9b97046867d1657d55979021028d583704b30c481a9c165191b08d707"},
+    {file = "pyzstd-0.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:c17c0fc02f0e75b0c7cd21f8eaf4c6ce4112333b447d93da1773a5f705b2c178"},
+    {file = "pyzstd-0.16.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4081fd841a9efe9ded7290ee7502dbf042c4158b90edfadea3b8a072c8ec4e1"},
+    {file = "pyzstd-0.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fd3fa45d2aeb65367dd702806b2e779d13f1a3fa2d13d5ec777cfd09de6822de"},
+    {file = "pyzstd-0.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8b5f0d2c07994a5180d8259d51df6227a57098774bb0618423d7eb4a7303467"},
+    {file = "pyzstd-0.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60c9d25b15c7ae06ed5d516d096a0d8254f9bed4368b370a09cccf191eaab5cb"},
+    {file = "pyzstd-0.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29acf31ce37254f6cad08deb24b9d9ba954f426fa08f8fae4ab4fdc51a03f4ae"},
+    {file = "pyzstd-0.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec77612a17697a9f7cf6634ffcee616eba9b997712fdd896e77fd19ab3a0618"},
+    {file = "pyzstd-0.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:313ea4974be93be12c9a640ab40f0fc50a023178aae004a8901507b74f190173"},
+    {file = "pyzstd-0.16.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e91acdefc8c2c6c3b8d5b1b5fe837dce4e591ecb7c0a2a50186f552e57d11203"},
+    {file = "pyzstd-0.16.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:929bd91a403539e72b5b5cb97f725ac4acafe692ccf52f075e20cd9bf6e5493d"},
+    {file = "pyzstd-0.16.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:740837a379aa32d110911ebcbbc524f9a9b145355737527543a884bd8777ca4f"},
+    {file = "pyzstd-0.16.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:adfc0e80dd157e6d1e0b0112c8ecc4b58a7a23760bd9623d74122ef637cfbdb6"},
+    {file = "pyzstd-0.16.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:79b183beae1c080ad3dca39019e49b7785391947f9aab68893ad85d27828c6e7"},
+    {file = "pyzstd-0.16.2-cp312-cp312-win32.whl", hash = "sha256:b8d00631a3c466bc313847fab2a01f6b73b3165de0886fb03210e08567ae3a89"},
+    {file = "pyzstd-0.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:c0d43764e9a60607f35d8cb3e60df772a678935ab0e02e2804d4147377f4942c"},
+    {file = "pyzstd-0.16.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3ae9ae7ad730562810912d7ecaf1fff5eaf4c726f4b4dfe04784ed5f06d7b91f"},
+    {file = "pyzstd-0.16.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2ce8d3c213f76a564420f3d0137066ac007ce9fb4e156b989835caef12b367a7"},
+    {file = "pyzstd-0.16.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2c14dac23c865e2d78cebd9087e148674b7154f633afd4709b4cd1520b99a61"},
+    {file = "pyzstd-0.16.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4527969d66a943e36ef374eda847e918077de032d58b5df84d98ffd717b6fa77"},
+    {file = "pyzstd-0.16.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd8256149b88e657e99f31e6d4b114c8ff2935951f1d8bb8e1fe501b224999c0"},
+    {file = "pyzstd-0.16.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bd1f1822d65c9054bf36d35307bf8ed4aa2d2d6827431761a813628ff671b1d"},
+    {file = "pyzstd-0.16.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6733f4d373ec9ad2c1976cf06f973a3324c1f9abe236d114d6bb91165a397d"},
+    {file = "pyzstd-0.16.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7bec165ab6524663f00b69bfefd13a46a69fed3015754abaf81b103ec73d92c6"},
+    {file = "pyzstd-0.16.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e4460fa6949aac6528a1ad0de8871079600b12b3ef4db49316306786a3598321"},
+    {file = "pyzstd-0.16.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:75df79ea0315c97d88337953a17daa44023dbf6389f8151903d371513f503e3c"},
+    {file = "pyzstd-0.16.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:93e1d45f4a196afb6f18682c79bdd5399277ead105b67f30b35c04c207966071"},
+    {file = "pyzstd-0.16.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:075e18b871f38a503b5d23e40a661adfc750bd4bd0bb8b208c1e290f3ceb8fa2"},
+    {file = "pyzstd-0.16.2-cp313-cp313-win32.whl", hash = "sha256:9e4295eb299f8d87e3487852bca033d30332033272a801ca8130e934475e07a9"},
+    {file = "pyzstd-0.16.2-cp313-cp313-win_amd64.whl", hash = "sha256:18deedc70f858f4cf574e59f305d2a0678e54db2751a33dba9f481f91bc71c28"},
+    {file = "pyzstd-0.16.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a9892b707ef52f599098b1e9528df0e7849c5ec01d3e8035fb0e67de4b464839"},
+    {file = "pyzstd-0.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4fbd647864341f3c174c4a6d7f20e6ea6b4be9d840fb900dc0faf0849561badc"},
+    {file = "pyzstd-0.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20ac2c15656cc6194c4fed1cb0e8159f9394d4ea1d58be755448743d2ec6c9c4"},
+    {file = "pyzstd-0.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b239fb9a20c1be3374b9a2bd183ba624fd22ad7a3f67738c0d80cda68b4ae1d3"},
+    {file = "pyzstd-0.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc52400412cdae2635e0978b8d6bcc0028cc638fdab2fd301f6d157675d26896"},
+    {file = "pyzstd-0.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b766a6aeb8dbb6c46e622e7a1aebfa9ab03838528273796941005a5ce7257b1"},
+    {file = "pyzstd-0.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd4b8676052f9d59579242bf3cfe5fd02532b6a9a93ab7737c118ae3b8509dc"},
+    {file = "pyzstd-0.16.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1c6c0a677aac7c0e3d2d2605d4d68ffa9893fdeeb2e071040eb7c8750969d463"},
+    {file = "pyzstd-0.16.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:15f9c2d612e7e2023d68d321d1b479846751f792af89141931d44e82ae391394"},
+    {file = "pyzstd-0.16.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:11740bff847aad23beef4085a1bb767d101895881fe891f0a911aa27d43c372c"},
+    {file = "pyzstd-0.16.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:b9067483ebe860e4130a03ee665b3d7be4ec1608b208e645d5e7eb3492379464"},
+    {file = "pyzstd-0.16.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:988f0ba19b14c2fe0afefc444ac1edfb2f497b7d7c3212b2f587504cc2ec804e"},
+    {file = "pyzstd-0.16.2-cp39-cp39-win32.whl", hash = "sha256:8855acb1c3e3829030b9e9e9973b19e2d70f33efb14ad5c474b4d086864c959c"},
+    {file = "pyzstd-0.16.2-cp39-cp39-win_amd64.whl", hash = "sha256:018e88378df5e76f5e1d8cf4416576603b6bc4a103cbc66bb593eaac54c758de"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4b631117b97a42ff6dfd0ffc885a92fff462d7c34766b28383c57b996f863338"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:56493a3fbe1b651a02102dd0902b0aa2377a732ff3544fb6fb3f114ca18db52f"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1eae9bdba4a1e5d3181331f403114ff5b8ce0f4b569f48eba2b9beb2deef1e4"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1be6972391c8aeecc7e61feb96ffc8e77a401bcba6ed994e7171330c45a1948"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:761439d687e3a5687c2ff5c6a1190e1601362a4a3e8c6c82ff89719d51d73e19"},
+    {file = "pyzstd-0.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f5fbdb8cf31b60b2dc586fecb9b73e2f172c21a0b320ed275f7b8d8a866d9003"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:183f26e34f9becf0f2db38be9c0bfb136753d228bcb47c06c69175901bea7776"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:88318b64b5205a67748148d6d244097fa6cf61fcea02ad3435511b9e7155ae16"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73142aa2571b6480136a1865ebda8257e09eabbc8bcd54b222202f6fa4febe1e"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d3f8877c29a97f1b1bba16f3d3ab01ad10ad3da7bad317aecf36aaf8848b37c"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f25754562473ac7de856b8331ebd5964f5d85601045627a5f0bb0e4e899990"},
+    {file = "pyzstd-0.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6ce17e84310080c55c02827ad9bb17893c00a845c8386a328b346f814aabd2c1"},
+    {file = "pyzstd-0.16.2.tar.gz", hash = "sha256:179c1a2ea1565abf09c5f2fd72f9ce7c54b2764cf7369e05c0bfd8f1f67f63d2"},
+]
+
+[[package]]
+name = "requests"
+version = "2.32.3"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.8"
+groups = ["tutorials"]
+files = [
+    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "texttable"
+version = "1.7.0"
+description = "module to create simple ASCII tables"
+optional = false
+python-versions = "*"
+groups = ["tutorials"]
+files = [
+    {file = "texttable-1.7.0-py2.py3-none-any.whl", hash = "sha256:72227d592c82b3d7f672731ae73e4d1f88cd8e2ef5b075a7a7f01a23a3743917"},
+    {file = "texttable-1.7.0.tar.gz", hash = "sha256:2d2068fb55115807d3ac77a4ca68fa48803e84ebb0ee2340f858107a36522638"},
+]
+
 [[package]]
 name = "tomli"
 version = "2.2.1"
@@ -354,7 +1178,25 @@ files = [
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
+[[package]]
+name = "urllib3"
+version = "2.3.0"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=3.9"
+groups = ["tutorials"]
+files = [
+    {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
+    {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
+]
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+h2 = ["h2 (>=4,<5)"]
+socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
+zstd = ["zstandard (>=0.18.0)"]
+
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9 <3.13"
-content-hash = "52c129fee3e94e69edf727f219bc7582ddbfcedf6c43547a7f67a876051bd7c4"
+content-hash = "33ae7f96a3999d6822af7778f9b7878355d811534a4b5fec14d51ec29aa8dce2"
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 8c0c1ba05..36097e2c9 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -34,6 +34,11 @@ black = "^25.1.0"
 flake8 = "^7.1.1"
 isort = "^6.0.0"
 
+[tool.poetry.group.tutorials.dependencies]
+py7zr = "^0.22.0"
+requests = "^2.32.3"
+click = "^8.1.8"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

From c0d6d7b58175a04d08f81de7d574979ea2af4610 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 10:57:04 -0800
Subject: [PATCH 44/53] Make motif.py execute in whole again

---
 python/graphframes/tutorials/motif.py | 52 ++++++++++++++-------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py
index 4a2189c56..2f5eb030c 100644
--- a/python/graphframes/tutorials/motif.py
+++ b/python/graphframes/tutorials/motif.py
@@ -16,7 +16,8 @@
 spark: SparkSession = (
     SparkSession.builder.appName("Stack Overflow Motif Analysis")
     # Lets the Id:(Stack Overflow int) and id:(GraphFrames ULID) coexist
-    .config("spark.sql.caseSensitive", True).getOrCreate()
+    .config("spark.sql.caseSensitive", True)
+    .getOrCreate()
 )
 sc: SparkContext = spark.sparkContext
 sc.setCheckpointDir("/tmp/graphframes-checkpoints")
@@ -25,8 +26,9 @@
 STACKEXCHANGE_SITE = "stats.meta.stackexchange.com"
 BASE_PATH = f"python/graphframes/tutorials/data/{STACKEXCHANGE_SITE}"
 
+
 #
-# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache.
+# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache. 
 #
 
 # We created these in stackexchange.py from Stack Exchange data dump XML files
@@ -45,7 +47,8 @@
 
 # What kind of nodes we do we have to work with?
 node_counts = (
-    nodes_df.select("id", F.col("Type").alias("Node Type"))
+    nodes_df
+    .select("id", F.col("Type").alias("Node Type"))
     .groupBy("Node Type")
     .count()
     .orderBy(F.col("count").desc())
@@ -56,7 +59,8 @@
 
 # What kind of edges do we have to work with?
 edge_counts = (
-    edges_df.select("src", "dst", F.col("relationship").alias("Edge Type"))
+    edges_df
+    .select("src", "dst", F.col("relationship").alias("Edge Type"))
     .groupBy("Edge Type")
     .count()
     .orderBy(F.col("count").desc())
@@ -65,7 +69,7 @@
 )
 edge_counts.show()
 
-g = GraphFrame(nodes_df, edges_df)
+g = GraphFrame(nodes_df, edges_df)  
 
 g.vertices.show(10)
 print(f"Node columns: {g.vertices.columns}")
@@ -166,28 +170,25 @@
 )
 graphlet_count_df.show()
 
-graphlet_count_df.orderBy(
-    [
-        "A_Type",
-        "(a)-[e1]->(b)",
-        "B_Type",
-        "(b)-[e2]->(c)",
-        "C_Type",
-        "(d)-[e3]->(c)",
-        "D_Type",
-    ],
-    ascending=False,
-).show(104)
+graphlet_count_df.orderBy([
+    "A_Type",
+    "(a)-[e1]->(b)",
+    "B_Type",
+    "(b)-[e2]->(c)",
+    "C_Type",
+    "(d)-[e3]->(c)",
+    "D_Type",
+], ascending=False).show(104)
 
 # A user answers an answer that answers a question that links to an answer.
 linked_vote_paths = paths.filter(
-    (F.col("a.Type") == "Vote")
-    & (F.col("e1.relationship") == "CastFor")
-    & (F.col("b.Type") == "Question")
-    & (F.col("e2.relationship") == "Links")
-    & (F.col("c.Type") == "Question")
-    & (F.col("e3.relationship") == "CastFor")
-    & (F.col("d.Type") == "Vote")
+    (F.col("a.Type") == "Vote") &
+    (F.col("e1.relationship") == "CastFor") &
+    (F.col("b.Type") == "Question") &
+    (F.col("e2.relationship") == "Links") &
+    (F.col("c.Type") == "Question") &
+    (F.col("e3.relationship") == "CastFor") &
+    (F.col("d.Type") == "Vote")
 )
 
 # Sanity check the count - it should match the table above
@@ -197,7 +198,8 @@
 c_vote_counts = linked_vote_paths.select("c", "d").distinct().groupBy("c").count()
 
 linked_vote_counts = (
-    linked_vote_paths.filter((F.col("a.VoteTypeId") == 2) & (F.col("d.VoteTypeId") == 2))
+    linked_vote_paths
+    .filter((F.col("a.VoteTypeId") == 2) & (F.col("d.VoteTypeId") == 2))
     .select("b", "c")
     .join(b_vote_counts, on="b", how="inner")
     .withColumnRenamed("count", "b_count")

From 5bb4c26b101b524193076e2870a5f5894e0a9c16 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 11:55:50 -0800
Subject: [PATCH 45/53] Minor isort format and cleanup of download.py

---
 python/graphframes/tutorials/download.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/graphframes/tutorials/download.py b/python/graphframes/tutorials/download.py
index 154d84c14..e81eff8b9 100755
--- a/python/graphframes/tutorials/download.py
+++ b/python/graphframes/tutorials/download.py
@@ -1,14 +1,21 @@
 #!/usr/bin/env python
 
+"""Download and decompress the Stack Exchange data dump from the Internet Archive."""
+
 import os
+
 import click
-import requests
 import py7zr
+import requests  # type: ignore
 
 
 @click.command()
 @click.argument("subdomain")
-@click.option("--data-dir", default="python/graphframes/tutorials/data", help="Directory to store downloaded files")
+@click.option(
+    "--data-dir",
+    default="python/graphframes/tutorials/data",
+    help="Directory to store downloaded files",
+)
 @click.option(
     "--extract/--no-extract", default=True, help="Whether to extract the archive after download"
 )

From 99e6a4d14e6eb7cdc2c001ebefc1c3312ff43ced Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 11:56:13 -0800
Subject: [PATCH 46/53] Minor isort format and cleanup of utils.py

---
 python/graphframes/tutorials/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/graphframes/tutorials/utils.py b/python/graphframes/tutorials/utils.py
index 54ef40f8b..46db14d96 100644
--- a/python/graphframes/tutorials/utils.py
+++ b/python/graphframes/tutorials/utils.py
@@ -1,7 +1,10 @@
+"""Utilities for Network Moitif Finding Tutorial"""
+
 from pyspark.sql import DataFrame
-from graphframes import GraphFrame
 from pyspark.sql import functions as F
 
+from graphframes import GraphFrame
+
 
 def three_edge_count(paths: DataFrame) -> DataFrame:
     """three_edge_count View the counts of the different types of 3-node graphlets in the graph.

From 662e197960a424c1f58c151b663c46d9d63da6be Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 11:57:40 -0800
Subject: [PATCH 47/53] Removed case sensitivity from the script - that was
 confusing people who just pasted or tried to run the code without a new
 SparkSession.

---
 python/graphframes/tutorials/stackexchange.py | 48 ++++++++++++-------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py
index c52f323bb..5e029746e 100644
--- a/python/graphframes/tutorials/stackexchange.py
+++ b/python/graphframes/tutorials/stackexchange.py
@@ -1,4 +1,4 @@
-# Build a Graph out of the Stack Exchange Data Dump XML files
+"""Build a Graph out of the Stack Exchange Data Dump XML files."""
 
 #
 # Interactive Usage: pyspark --packages com.databricks:spark-xml_2.12:0.18.0
@@ -47,11 +47,9 @@ def split_tags(tags: str) -> List[str]:
 # Initialize a SparkSession with case sensitivity
 #
 
-spark: SparkSession = (
-    SparkSession.builder.appName("Stack Exchange Graph Builder")
-    # Lets the Id:(Stack Overflow int) and id:(GraphFrames UUID) coexist
-    .config("spark.sql.caseSensitive", True).getOrCreate()
-)
+spark: SparkSession = SparkSession.builder.appName("Stack Exchange Graph Builder").getOrCreate()
+sc = spark.sparkContext
+sc.setCheckpointDir("/tmp/graphframes-checkpoints")
 
 print("Loading data for stats.meta.stackexchange.com ...")
 
@@ -296,12 +294,23 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 )
 print(f"Total distinct nodes: {nodes_df.count():,}")
 
-# Now add a unique ID field
+# Now add a unique lowercase 'id' field - standard for GraphFrames - moving the original...
+# Stack Exchange Id to StackId
+nodes_df = nodes_df.withColumnRenamed("Id", "StackId").drop("Id")
+
+# Update the column list...
+if "Id" in all_column_names:
+    all_column_names.remove("Id")
+all_column_names += ["StackId"]
+all_column_names = sorted(all_column_names)
+
+# Add the UUID 'id' field for GraphFrames. It will go in edges as 'src' and 'dst'
 nodes_df = nodes_df.withColumn("id", F.expr("uuid()")).select("id", *all_column_names)
 
 # Now create posts - combined questions and answers for things that can apply to them both
 posts_df = questions_df.unionByName(answers_df).cache()
 
+
 #
 # Store the nodes to disk, reload and cache
 #
@@ -361,12 +370,12 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 
 src_vote_df: DataFrame = votes_df.select(
     F.col("id").alias("src"),
-    F.col("Id").alias("VoteId"),
+    F.col("StackId").alias("VoteId"),
     # Everything has all the fields - should build from base records but need UUIDs
     F.col("PostId").alias("VotePostId"),
 )
 cast_for_edge_df: DataFrame = src_vote_df.join(
-    posts_df, on=src_vote_df.VotePostId == posts_df.Id, how="inner"
+    posts_df, on=src_vote_df.VotePostId == posts_df.StackId, how="inner"
 ).select(
     # 'src' comes from the votes' 'id'
     "src",
@@ -378,6 +387,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 print(f"Total CastFor edges: {cast_for_edge_df.count():,}")
 print(f"Percentage of linked votes: {cast_for_edge_df.count() / votes_df.count():.2%}\n")
 
+
 #
 # Create a [User]--Asks-->[Question] edge
 #
@@ -388,7 +398,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     F.lit("Asks").alias("relationship"),
 )
 user_asks_edges_df: DataFrame = questions_asked_df.join(
-    users_df, on=questions_asked_df.QuestionUserId == users_df.Id, how="inner"
+    users_df, on=questions_asked_df.QuestionUserId == users_df.StackId, how="inner"
 ).select(
     # 'src' comes from the users' 'id'
     F.col("id").alias("src"),
@@ -402,6 +412,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n"
 )
 
+
 #
 # Create a [User]--Posts-->[Answer] edge.
 #
@@ -412,7 +423,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     F.lit("Posts").alias("relationship"),
 )
 user_answers_edges_df = user_answers_df.join(
-    users_df, on=user_answers_df.AnswerUserId == users_df.Id, how="inner"
+    users_df, on=user_answers_df.AnswerUserId == users_df.StackId, how="inner"
 ).select(
     # 'src' comes from the users' 'id'
     F.col("id").alias("src"),
@@ -426,17 +437,18 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n"
 )
 
+
 #
 # Create a [Answer]--Answers-->[Question] edge
 #
 
 src_answers_df: DataFrame = answers_df.select(
     F.col("id").alias("src"),
-    F.col("Id").alias("AnswerId"),
+    F.col("StackId").alias("AnswerId"),
     F.col("ParentId").alias("AnswerParentId"),
 )
 question_answers_edges_df: DataFrame = src_answers_df.join(
-    posts_df, on=src_answers_df.AnswerParentId == questions_df.Id, how="inner"
+    posts_df, on=src_answers_df.AnswerParentId == questions_df.StackId, how="inner"
 ).select(
     # 'src' comes from the answers' 'id'
     "src",
@@ -450,6 +462,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     f"Percentage of linked answers: {question_answers_edges_df.count() / answers_df.count():.2%}\n"
 )
 
+
 #
 # Create a [Tag]--Tags-->[Post] edge... remember a Post is a Question or Answer
 #
@@ -472,6 +485,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 print(f"Total Tags edges: {tags_edge_df.count():,}")
 print(f"Percentage of linked tags: {tags_edge_df.count() / posts_df.count():.2%}\n")
 
+
 #
 # Create a [User]--Earns-->[Badge] edge
 #
@@ -482,7 +496,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     F.lit("Earns").alias("relationship"),
 )
 earns_edges_df = earns_edges_df.join(
-    users_df, on=earns_edges_df.BadgeUserId == users_df.Id, how="inner"
+    users_df, on=earns_edges_df.BadgeUserId == users_df.StackId, how="inner"
 ).select(
     # 'src' comes from the users' 'id'
     F.col("id").alias("src"),
@@ -494,6 +508,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 print(f"Total Earns edges: {earns_edges_df.count():,}")
 print(f"Percentage of earned badges: {earns_edges_df.count() / badges_df.count():.2%}\n")
 
+
 #
 # Create a [Post]--Links-->[Post] edge... remember a Post is a Question or Answer
 # Also a   [Post]--Duplicates-->[Post] edge... remember a Post is a Question or Answer
@@ -505,7 +520,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     "LinkType",
 )
 links_src_edge_df: DataFrame = trim_links_df.join(
-    posts_df.drop("LinkType"), on=trim_links_df.SrcPostId == posts_df.Id, how="inner"
+    posts_df.drop("LinkType"), on=trim_links_df.SrcPostId == posts_df.StackId, how="inner"
 ).select(
     # 'dst' comes from the posts' 'id'
     F.col("id").alias("src"),
@@ -513,7 +528,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     "LinkType",
 )
 raw_links_edge_df = links_src_edge_df.join(
-    posts_df.drop("LinkType"), on=links_src_edge_df.DstPostId == posts_df.Id, how="inner"
+    posts_df.drop("LinkType"), on=links_src_edge_df.DstPostId == posts_df.StackId, how="inner"
 ).select(
     "src",
     # 'src' comes from the posts' 'id'
@@ -557,6 +572,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     "count", F.format_number(F.col("count"), 0)
 ).show()
 
+
 # +------------+------+
 # |relationship| count|
 # +------------+------+

From beaa35d60be2a8635e3f2743b3543631875cadcb Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Mon, 17 Feb 2025 11:58:29 -0800
Subject: [PATCH 48/53] motif.py now matches tutorial code, runs and handles
 case insensitivity.

---
 python/graphframes/tutorials/motif.py | 57 ++++++++++++---------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py
index 2f5eb030c..a4a82953a 100644
--- a/python/graphframes/tutorials/motif.py
+++ b/python/graphframes/tutorials/motif.py
@@ -1,4 +1,4 @@
-# Demonstrate GraphFrames network motif finding capabilities
+"""Demonstrate GraphFrames network motif finding capabilities. Code from the Network Motif Finding Tutorial."""
 
 #
 # Interactive Usage: pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12
@@ -13,12 +13,7 @@
 from graphframes import GraphFrame
 
 # Initialize a SparkSession
-spark: SparkSession = (
-    SparkSession.builder.appName("Stack Overflow Motif Analysis")
-    # Lets the Id:(Stack Overflow int) and id:(GraphFrames ULID) coexist
-    .config("spark.sql.caseSensitive", True)
-    .getOrCreate()
-)
+spark: SparkSession = SparkSession.builder.appName("Stack Overflow Motif Analysis").getOrCreate()
 sc: SparkContext = spark.sparkContext
 sc.setCheckpointDir("/tmp/graphframes-checkpoints")
 
@@ -28,7 +23,7 @@
 
 
 #
-# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache. 
+# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache.
 #
 
 # We created these in stackexchange.py from Stack Exchange data dump XML files
@@ -47,8 +42,7 @@
 
 # What kind of nodes we do we have to work with?
 node_counts = (
-    nodes_df
-    .select("id", F.col("Type").alias("Node Type"))
+    nodes_df.select("id", F.col("Type").alias("Node Type"))
     .groupBy("Node Type")
     .count()
     .orderBy(F.col("count").desc())
@@ -59,8 +53,7 @@
 
 # What kind of edges do we have to work with?
 edge_counts = (
-    edges_df
-    .select("src", "dst", F.col("relationship").alias("Edge Type"))
+    edges_df.select("src", "dst", F.col("relationship").alias("Edge Type"))
     .groupBy("Edge Type")
     .count()
     .orderBy(F.col("count").desc())
@@ -69,7 +62,7 @@
 )
 edge_counts.show()
 
-g = GraphFrame(nodes_df, edges_df)  
+g = GraphFrame(nodes_df, edges_df)
 
 g.vertices.show(10)
 print(f"Node columns: {g.vertices.columns}")
@@ -170,25 +163,28 @@
 )
 graphlet_count_df.show()
 
-graphlet_count_df.orderBy([
-    "A_Type",
-    "(a)-[e1]->(b)",
-    "B_Type",
-    "(b)-[e2]->(c)",
-    "C_Type",
-    "(d)-[e3]->(c)",
-    "D_Type",
-], ascending=False).show(104)
+graphlet_count_df.orderBy(
+    [
+        "A_Type",
+        "(a)-[e1]->(b)",
+        "B_Type",
+        "(b)-[e2]->(c)",
+        "C_Type",
+        "(d)-[e3]->(c)",
+        "D_Type",
+    ],
+    ascending=False,
+).show(104)
 
 # A user answers an answer that answers a question that links to an answer.
 linked_vote_paths = paths.filter(
-    (F.col("a.Type") == "Vote") &
-    (F.col("e1.relationship") == "CastFor") &
-    (F.col("b.Type") == "Question") &
-    (F.col("e2.relationship") == "Links") &
-    (F.col("c.Type") == "Question") &
-    (F.col("e3.relationship") == "CastFor") &
-    (F.col("d.Type") == "Vote")
+    (F.col("a.Type") == "Vote")
+    & (F.col("e1.relationship") == "CastFor")
+    & (F.col("b.Type") == "Question")
+    & (F.col("e2.relationship") == "Links")
+    & (F.col("c.Type") == "Question")
+    & (F.col("e3.relationship") == "CastFor")
+    & (F.col("d.Type") == "Vote")
 )
 
 # Sanity check the count - it should match the table above
@@ -198,8 +194,7 @@
 c_vote_counts = linked_vote_paths.select("c", "d").distinct().groupBy("c").count()
 
 linked_vote_counts = (
-    linked_vote_paths
-    .filter((F.col("a.VoteTypeId") == 2) & (F.col("d.VoteTypeId") == 2))
+    linked_vote_paths.filter((F.col("a.VoteTypeId") == 2) & (F.col("d.VoteTypeId") == 2))
     .select("b", "c")
     .join(b_vote_counts, on="b", how="inner")
     .withColumnRenamed("count", "b_count")

From ef19784b9dd1befdab3d422fadf660139291f9b8 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Fri, 21 Feb 2025 11:11:31 +0100
Subject: [PATCH 49/53] Setup a 'graphframes stackexchange' comand.

---
 python/graphframes/console.py            | 19 +++++++++++++++++++
 python/graphframes/tutorials/download.py |  4 ++--
 python/pyproject.toml                    |  6 ++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 python/graphframes/console.py

diff --git a/python/graphframes/console.py b/python/graphframes/console.py
new file mode 100644
index 000000000..d2b38d28b
--- /dev/null
+++ b/python/graphframes/console.py
@@ -0,0 +1,19 @@
+import click
+from graphframes.tutorials import download
+
+
+@click.group()
+def cli():
+    """GraphFrames CLI: a collection of commands for graphframes."""
+    pass
+
+
+cli.add_command(download.stackexchange)
+
+
+def main():
+    cli()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/graphframes/tutorials/download.py b/python/graphframes/tutorials/download.py
index e81eff8b9..049b1fa15 100755
--- a/python/graphframes/tutorials/download.py
+++ b/python/graphframes/tutorials/download.py
@@ -19,7 +19,7 @@
 @click.option(
     "--extract/--no-extract", default=True, help="Whether to extract the archive after download"
 )
-def download_stackexchange(subdomain: str, data_dir: str, extract: bool) -> None:
+def stackexchange(subdomain: str, data_dir: str, extract: bool) -> None:
     """Download Stack Exchange archive for a given SUBDOMAIN.
 
     Example: python/graphframes/tutorials/download.py stats.meta
@@ -68,4 +68,4 @@ def download_stackexchange(subdomain: str, data_dir: str, extract: bool) -> None
 
 
 if __name__ == "__main__":
-    download_stackexchange()
+    stackexchange()
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 36097e2c9..819d2bbdd 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -39,6 +39,9 @@ py7zr = "^0.22.0"
 requests = "^2.32.3"
 click = "^8.1.8"
 
+[tool.poetry.scripts]
+graphframes = "graphframes.console:main"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
@@ -48,6 +51,9 @@ line-length = 100
 target-version = ["py39"]
 include = ["graphframes"]
 
+[tool.flake8]
+max-line-length = 100
+
 [tool.isort]
 profile = "black"
 src_paths = ["graphframes"]

From 4400cb4335a9237363ee033c40e44cbb7b3041c0 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Fri, 21 Feb 2025 11:13:29 +0100
Subject: [PATCH 50/53] Make graphframes.tutorials.motif use a checkpoint dir
 unique, and from SparkSession.sparkContext. Use click.echo instead of print

---
 python/graphframes/tutorials/motif.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py
index a4a82953a..59691946a 100644
--- a/python/graphframes/tutorials/motif.py
+++ b/python/graphframes/tutorials/motif.py
@@ -6,16 +6,15 @@
 # Batch Usage: spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py
 #
 
+import click
 import pyspark.sql.functions as F
-from pyspark import SparkContext
 from pyspark.sql import DataFrame, SparkSession
 
 from graphframes import GraphFrame
 
 # Initialize a SparkSession
 spark: SparkSession = SparkSession.builder.appName("Stack Overflow Motif Analysis").getOrCreate()
-sc: SparkContext = spark.sparkContext
-sc.setCheckpointDir("/tmp/graphframes-checkpoints")
+spark.sparkContext.setCheckpointDir("/tmp/graphframes-checkpoints/motif")
 
 # Change me if you download a different stackexchange site
 STACKEXCHANGE_SITE = "stats.meta.stackexchange.com"
@@ -65,7 +64,7 @@
 g = GraphFrame(nodes_df, edges_df)
 
 g.vertices.show(10)
-print(f"Node columns: {g.vertices.columns}")
+click.echo(f"Node columns: {g.vertices.columns}")
 
 g.edges.sample(0.0001).show(10)
 
@@ -82,7 +81,7 @@
 assert (
     edge_count == valid_edge_count
 ), f"Edge count {edge_count} != valid edge count {valid_edge_count}"
-print(f"Edge count: {edge_count:,} == Valid edge count: {valid_edge_count:,}")
+click.echo(f"Edge count: {edge_count:,} == Valid edge count: {valid_edge_count:,}")
 
 # G4: Continuous Triangles
 paths = g.find("(a)-[e1]->(b); (b)-[e2]->(c); (c)-[e3]->(a)")

From d549c566c7a500d4b16319851e88f8ffbd4df61e Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Fri, 21 Feb 2025 11:23:19 +0100
Subject: [PATCH 51/53] Use spark.sparkContext.setCheckpointDir directly
 instead of instantiating a SparkContext. print-->click.echo

---
 python/graphframes/tutorials/stackexchange.py | 63 +++++++++----------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py
index 5e029746e..72185c446 100644
--- a/python/graphframes/tutorials/stackexchange.py
+++ b/python/graphframes/tutorials/stackexchange.py
@@ -5,10 +5,10 @@
 #
 # Batch Usage: spark-submit --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py
 #
-
 import re
 from typing import List, Tuple
 
+import click
 import pyspark.sql.functions as F
 import pyspark.sql.types as T
 from pyspark.sql import DataFrame, SparkSession
@@ -48,10 +48,9 @@ def split_tags(tags: str) -> List[str]:
 #
 
 spark: SparkSession = SparkSession.builder.appName("Stack Exchange Graph Builder").getOrCreate()
-sc = spark.sparkContext
-sc.setCheckpointDir("/tmp/graphframes-checkpoints")
+spark.sparkContext.setCheckpointDir("/tmp/graphframes-checkpoints/stackexchange")
 
-print("Loading data for stats.meta.stackexchange.com ...")
+click.echo("Loading data for stats.meta.stackexchange.com ...")
 
 
 #
@@ -63,7 +62,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="posts")
     .load(f"{BASE_PATH}/Posts.xml")
 )
-print(f"\nTotal Posts:       {posts_df.count():,}")
+click.echo(f"\nTotal Posts:       {posts_df.count():,}")
 
 # Remove the _ prefix from field names
 posts_df = remove_prefix(posts_df)
@@ -85,14 +84,14 @@ def split_tags(tags: str) -> List[str]:
 # Do the questions look ok? Questions have NO parent ID and DO have a Title
 questions_df: DataFrame = posts_df.filter(posts_df.ParentId.isNull())
 questions_df = questions_df.withColumn("Type", F.lit("Question")).cache()
-print(f"\nTotal questions: {questions_df.count():,}\n")
+click.echo(f"\nTotal questions: {questions_df.count():,}\n")
 
 questions_df.select("ParentId", "Title", "Body").show(10)
 
 # Answers DO have a ParentId parent post and no Title
 answers_df: DataFrame = posts_df.filter(posts_df.ParentId.isNotNull())
 answers_df = answers_df.withColumn("Type", F.lit("Answer")).cache()
-print(f"\nTotal answers: {answers_df.count():,}\n")
+click.echo(f"\nTotal answers: {answers_df.count():,}\n")
 
 answers_df.select("ParentId", "Title", "Body").show(10)
 
@@ -107,7 +106,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="postlinks")
     .load(f"{BASE_PATH}/PostLinks.xml")
 )
-print(f"Total PostLinks:   {post_links_df.count():,}")
+click.echo(f"Total PostLinks:   {post_links_df.count():,}")
 
 # Remove the _ prefix from field names
 post_links_df = (
@@ -132,7 +131,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="posthistory")
     .load(f"{BASE_PATH}/PostHistory.xml")
 )
-print(f"Total PostHistory: {post_history_df.count():,}")
+click.echo(f"Total PostHistory: {post_history_df.count():,}")
 
 # Remove the _ prefix from field names
 post_history_df = remove_prefix(post_history_df).withColumn("Type", F.lit("PostHistory"))
@@ -148,7 +147,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="comments")
     .load(f"{BASE_PATH}/Comments.xml")
 )
-print(f"Total Comments:    {comments_df.count():,}")
+click.echo(f"Total Comments:    {comments_df.count():,}")
 
 # Remove the _ prefix from field names
 comments_df = remove_prefix(comments_df).withColumn("Type", F.lit("Comment"))
@@ -164,7 +163,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="users")
     .load(f"{BASE_PATH}/Users.xml")
 )
-print(f"Total Users:       {users_df.count():,}")
+click.echo(f"Total Users:       {users_df.count():,}")
 
 # Remove the _ prefix from field names
 users_df = remove_prefix(users_df).withColumn("Type", F.lit("User"))
@@ -180,7 +179,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="votes")
     .load(f"{BASE_PATH}/Votes.xml")
 )
-print(f"Total Votes:       {votes_df.count():,}")
+click.echo(f"Total Votes:       {votes_df.count():,}")
 
 # Remove the _ prefix from field names
 votes_df = remove_prefix(votes_df).withColumn("Type", F.lit("Vote"))
@@ -213,7 +212,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="tags")
     .load(f"{BASE_PATH}/Tags.xml")
 )
-print(f"Total Tags:        {tags_df.count():,}")
+click.echo(f"Total Tags:        {tags_df.count():,}")
 
 # Remove the _ prefix from field names
 tags_df = remove_prefix(tags_df).withColumn("Type", F.lit("Tag"))
@@ -229,7 +228,7 @@ def split_tags(tags: str) -> List[str]:
     .options(rootTag="badges")
     .load(f"{BASE_PATH}/Badges.xml")
 )
-print(f"Total Badges:      {badges_df.count():,}\n")
+click.echo(f"Total Badges:      {badges_df.count():,}\n")
 
 # Remove the _ prefix from field names
 badges_df = remove_prefix(badges_df).withColumn("Type", F.lit("Badge"))
@@ -292,7 +291,7 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     .unionByName(badges_df)
     .distinct()
 )
-print(f"Total distinct nodes: {nodes_df.count():,}")
+click.echo(f"Total distinct nodes: {nodes_df.count():,}")
 
 # Now add a unique lowercase 'id' field - standard for GraphFrames - moving the original...
 # Stack Exchange Id to StackId
@@ -384,8 +383,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     F.lit("CastFor").alias("relationship"),
 )
-print(f"Total CastFor edges: {cast_for_edge_df.count():,}")
-print(f"Percentage of linked votes: {cast_for_edge_df.count() / votes_df.count():.2%}\n")
+click.echo(f"Total CastFor edges: {cast_for_edge_df.count():,}")
+click.echo(f"Percentage of linked votes: {cast_for_edge_df.count() / votes_df.count():.2%}\n")
 
 
 #
@@ -407,8 +406,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     "relationship",
 )
-print(f"Total Asks edges: {user_asks_edges_df.count():,}")
-print(
+click.echo(f"Total Asks edges: {user_asks_edges_df.count():,}")
+click.echo(
     f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n"
 )
 
@@ -432,8 +431,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     "relationship",
 )
-print(f"Total User Answers edges: {user_answers_edges_df.count():,}")
-print(
+click.echo(f"Total User Answers edges: {user_answers_edges_df.count():,}")
+click.echo(
     f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n"
 )
 
@@ -457,8 +456,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     F.lit("Answers").alias("relationship"),
 )
-print(f"Total Posts Answers edges: {question_answers_edges_df.count():,}")
-print(
+click.echo(f"Total Posts Answers edges: {question_answers_edges_df.count():,}")
+click.echo(
     f"Percentage of linked answers: {question_answers_edges_df.count() / answers_df.count():.2%}\n"
 )
 
@@ -482,8 +481,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     F.lit("Tags").alias("relationship"),
 )
-print(f"Total Tags edges: {tags_edge_df.count():,}")
-print(f"Percentage of linked tags: {tags_edge_df.count() / posts_df.count():.2%}\n")
+click.echo(f"Total Tags edges: {tags_edge_df.count():,}")
+click.echo(f"Percentage of linked tags: {tags_edge_df.count() / posts_df.count():.2%}\n")
 
 
 #
@@ -505,8 +504,8 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     # All edges have a 'relationship' field
     "relationship",
 )
-print(f"Total Earns edges: {earns_edges_df.count():,}")
-print(f"Percentage of earned badges: {earns_edges_df.count() / badges_df.count():.2%}\n")
+click.echo(f"Total Earns edges: {earns_edges_df.count():,}")
+click.echo(f"Percentage of earned badges: {earns_edges_df.count() / badges_df.count():.2%}\n")
 
 
 #
@@ -543,16 +542,16 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     .withColumn("relationship", F.lit("Duplicates"))
     .select("src", "dst", "relationship")
 )
-print(f"Total Duplicates edges: {duplicates_edge_df.count():,}")
-print(f"Percentage of duplicate posts: {duplicates_edge_df.count() / post_links_df.count():.2%}\n")
+click.echo(f"Total Duplicates edges: {duplicates_edge_df.count():,}")
+click.echo(f"Percentage of duplicate posts: {duplicates_edge_df.count() / post_links_df.count():.2%}\n")
 
 linked_edge_df = (
     raw_links_edge_df.filter(F.col("LinkType") == "Linked")
     .withColumn("relationship", F.lit("Links"))
     .select("src", "dst", "relationship")
 )
-print(f"Total Links edges: {linked_edge_df.count():,}")
-print(f"Percentage of linked posts: {linked_edge_df.count() / post_links_df.count():.2%}\n")
+click.echo(f"Total Links edges: {linked_edge_df.count():,}")
+click.echo(f"Percentage of linked posts: {linked_edge_df.count() / post_links_df.count():.2%}\n")
 
 
 #
@@ -592,4 +591,4 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
 relationships_df.write.mode("overwrite").parquet(EDGES_PATH)
 
 spark.stop()
-print("Spark stopped.")
+click.echo("Spark stopped.")

From b97063677aca43d918ad775469a58cff39eefb3a Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Fri, 21 Feb 2025 11:49:44 +0100
Subject: [PATCH 52/53] Using 'from __future__ import annotations' intsead of
 List and Tuple

---
 python/graphframes/tutorials/stackexchange.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py
index 72185c446..02ebb2bb5 100644
--- a/python/graphframes/tutorials/stackexchange.py
+++ b/python/graphframes/tutorials/stackexchange.py
@@ -5,8 +5,9 @@
 #
 # Batch Usage: spark-submit --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py
 #
+from __future__ import annotations
+
 import re
-from typing import List, Tuple
 
 import click
 import pyspark.sql.functions as F
@@ -36,7 +37,7 @@ def remove_prefix(df: DataFrame) -> DataFrame:
 
 
 @F.udf(returnType=T.ArrayType(T.StringType()))
-def split_tags(tags: str) -> List[str]:
+def split_tags(tags: str) -> list[str]:
     if not tags:
         return []
     # Remove < and > and split into array
@@ -238,7 +239,7 @@ def split_tags(tags: str) -> List[str]:
 # Form the nodes from the UNION of posts, users, votes and their combined schemas
 #
 
-all_cols: List[Tuple[str, T.StructField]] = list(
+all_cols: list[tuple[str, T.StructField]] = list(
     set(
         list(zip(answers_df.columns, answers_df.schema))
         + list(zip(questions_df.columns, questions_df.schema))
@@ -250,10 +251,10 @@ def split_tags(tags: str) -> List[str]:
         + list(zip(badges_df.columns, badges_df.schema))
     )
 )
-all_column_names: List[str] = sorted([x[0] for x in all_cols])
+all_column_names: list[str] = sorted([x[0] for x in all_cols])
 
 
-def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]) -> DataFrame:
+def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]]) -> DataFrame:
     """Add any missing columns from any DataFrame among several we want to merge."""
     for col_name, schema_field in all_cols:
         if col_name not in df.columns:
@@ -543,7 +544,9 @@ def add_missing_columns(df: DataFrame, all_cols: List[Tuple[str, T.StructField]]
     .select("src", "dst", "relationship")
 )
 click.echo(f"Total Duplicates edges: {duplicates_edge_df.count():,}")
-click.echo(f"Percentage of duplicate posts: {duplicates_edge_df.count() / post_links_df.count():.2%}\n")
+click.echo(
+    f"Percentage of duplicate posts: {duplicates_edge_df.count() / post_links_df.count():.2%}\n"
+)
 
 linked_edge_df = (
     raw_links_edge_df.filter(F.col("LinkType") == "Linked")

From 378894125e6baff2e0a6deab0635224e05f3ad26 Mon Sep 17 00:00:00 2001
From: Russell Jurney <russell.jurney@gmail.com>
Date: Fri, 21 Feb 2025 12:11:18 +0100
Subject: [PATCH 53/53] Now retry three times if we can't connect for any
 reason in 'graphframes stackexchange' command.

---
 python/graphframes/tutorials/download.py | 25 ++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/python/graphframes/tutorials/download.py b/python/graphframes/tutorials/download.py
index 049b1fa15..4eadfa647 100755
--- a/python/graphframes/tutorials/download.py
+++ b/python/graphframes/tutorials/download.py
@@ -36,13 +36,30 @@ def stackexchange(subdomain: str, data_dir: str, extract: bool) -> None:
     click.echo(f"Downloading archive from {archive_url}")
 
     try:
-        # Download the file
-        response = requests.get(archive_url, stream=True)
-        response.raise_for_status()  # Raise exception for bad status codes
+        # Download the file with retries
+        max_retries = 3
+        retry_count = 0
+
+        while retry_count < max_retries:
+            try:
+                response = requests.get(archive_url, stream=True)
+                response.raise_for_status()  # Raise exception for bad status codes
+                break
+            except (
+                requests.exceptions.RequestException,
+                requests.exceptions.ConnectionError,
+                requests.exceptions.HTTPError,
+                requests.exceptions.Timeout,
+            ) as e:
+                retry_count += 1
+                if retry_count == max_retries:
+                    click.echo(f"Failed to download after {max_retries} attempts: {e}", err=True)
+                    raise click.Abort()
+                click.echo(f"Download attempt {retry_count} failed, retrying...")
 
         total_size = int(response.headers.get("content-length", 0))
 
-        with click.progressbar(length=total_size, label="Downloading") as bar:
+        with click.progressbar(length=total_size, label="Downloading") as bar:  # type: ignore
             with open(archive_path, "wb") as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     if chunk: