use drop_null_keys, some pandas fastpaths

MarcoGorelli · MarcoGorelli · commit e6e9994108e5 · 2024-10-27T15:51:53.000Z
diff --git a/packages/python/plotly/_plotly_utils/basevalidators.py b/packages/python/plotly/_plotly_utils/basevalidators.py
@@ -8,6 +8,7 @@
 import re
 import sys
 import warnings
+import narwhals.stable.v1 as nw
 
 from _plotly_utils.optional_imports import get_module
 
@@ -93,8 +94,19 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
         "O": "object",
     }
 
-    # Handle pandas Series and Index objects
+    if isinstance(v, nw.Series):
+        if nw.dependencies.is_pandas_like_series(v_native := v.to_native()):
+            v = v_native
+        else:
+            v = v.to_numpy()
+    elif isinstance(v, nw.DataFrame):
+        if nw.dependencies.is_pandas_like_dataframe(v_native := v.to_native()):
+            v = v_native
+        else:
+            v = v.to_numpy()
+
     if pd and isinstance(v, (pd.Series, pd.Index)):
+        # Handle pandas Series and Index objects
         if v.dtype.kind in numeric_kinds:
             # Get the numeric numpy array so we use fast path below
             v = v.values
@@ -189,10 +201,12 @@ def is_homogeneous_array(v):
     """
     np = get_module("numpy", should_load=False)
     pd = get_module("pandas", should_load=False)
+    import narwhals as nw
     if (
         np
         and isinstance(v, np.ndarray)
         or (pd and isinstance(v, (pd.Series, pd.Index)))
+        or (isinstance(v, nw.Series))
     ):
         return True
     if is_numpy_convertable(v):
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -156,6 +156,8 @@ def invert_label(args, column):
 
 
 def _is_continuous(df: nw.DataFrame, col_name: str) -> bool:
+    if nw.dependencies.is_pandas_like_dataframe(df_native := df.to_native()):
+        return df_native[col_name].dtype.kind in 'ifc'
     return df.get_column(col_name).dtype.is_numeric()
 
 
@@ -1114,15 +1116,12 @@ def to_unindexed_series(x, name=None, native_namespace=None):
     itx index reset if pandas-like). Stripping the index from existing pd.Series is
     required to get things to match up right in the new DataFrame we're building.
     """
-    x_native = nw.to_native(x, strict=False)
-    if nw.dependencies.is_pandas_like_series(x_native):
-        return nw.from_native(
-            x_native.__class__(x_native, name=name).reset_index(drop=True),
-            series_only=True,
-        )
     x = nw.from_native(x, series_only=True, strict=False)
     if isinstance(x, nw.Series):
-        return x.rename(name)
+        if name == x.name:
+            # Avoid potentially creating a copy in pre-copy-on-write pandas
+            return nw.maybe_reset_index(x)
+        return nw.maybe_reset_index(x).rename(name)
     elif native_namespace is not None:
         return nw.new_series(name=name, values=x, native_namespace=native_namespace)
     else:
@@ -1907,7 +1906,7 @@ def post_agg(dframe: nw.LazyFrame, continuous_aggs, discrete_aggs) -> nw.LazyFra
     for i, level in enumerate(path):
 
         dfg = (
-            df.group_by(path[i:])
+            df.group_by(path[i:], drop_null_keys=True)
             .agg(**agg_f)
             .pipe(post_agg, continuous_aggs, discrete_aggs)
         )
@@ -2307,7 +2306,7 @@ def get_groups_and_orders(args, grouper):
         groups = {tuple(single_group_name): df}
     else:
         required_grouper = list(orders.keys())
-        grouped = dict(df.group_by(required_grouper).__iter__())
+        grouped = dict(df.group_by(required_grouper, drop_null_keys=True).__iter__())
         sorted_group_names = list(grouped.keys())
 
         for i, col in reversed(list(enumerate(required_grouper))):
diff --git a/packages/python/plotly/plotly/express/_imshow.py b/packages/python/plotly/plotly/express/_imshow.py
@@ -326,7 +326,6 @@ def imshow(
         if binary_string:
             raise ValueError("Binary strings cannot be used with pandas arrays")
         is_dataframe = True
-        img = img.to_numpy()
     else:
         is_dataframe = False
 
diff --git a/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py b/packages/python/plotly/plotly/figure_factory/_hexbin_mapbox.py
@@ -407,7 +407,7 @@ def create_hexbin_mapbox(
         center = dict(lat=lat_range.mean(), lon=lon_range.mean())
 
     if args["animation_frame"] is not None:
-        groups = dict(args["data_frame"].group_by(args["animation_frame"]).__iter__())
+        groups = dict(args["data_frame"].group_by(args["animation_frame"], drop_null_keys=True).__iter__())
     else:
         groups = {(0,): args["data_frame"]}