feat(bigframes): Support unstable sort_values, sort_index#16665
feat(bigframes): Support unstable sort_values, sort_index#16665TrevorBergeron wants to merge 1 commit intomainfrom
Conversation
There was a problem hiding this comment.
Code Review
This pull request implements stable sorting support by adding a kind parameter to sort_values and sort_index methods and updating the underlying core logic to handle sorting stability. The review feedback recommends centralizing the list of stable sort algorithms into a constant for better maintainability and suggests several refinements to type hints and default values in method overloads. Furthermore, the reviewer identified an opportunity to propagate the kind parameter when sorting DataFrames along the column axis to ensure consistent behavior.
| ] | ||
| VALID_WRITE_ENGINES = typing.get_args(WriteEngineType) | ||
|
|
||
| DEFAULT_SORT_KIND = "stable" |
| for column in index_columns | ||
| ] | ||
| return Index(self._block.order_by(ordering)) | ||
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] |
| *, | ||
| ascending: bool = ..., | ||
| inplace: Literal[False] = ..., | ||
| kind: str = ..., |
| *, | ||
| ascending: bool = ..., | ||
| inplace: Literal[True] = ..., | ||
| kind: str = ..., |
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] | ||
| block = self._block.order_by(ordering, stable=is_stable) |
There was a problem hiding this comment.
The kind parameter is correctly used for axis=0, but it should also be propagated to self.columns.sort_values when axis=1 (the else block) to ensure consistent sorting behavior for columns. Also, consider using the STABLE_SORT_KINDS constant.
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] | |
| block = self._block.order_by(ordering, stable=is_stable) | |
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in constants.STABLE_SORT_KINDS | |
| block = self._block.order_by(ordering, stable=is_stable) |
| inplace: Literal[False] = ..., | ||
| ascending: bool | typing.Sequence[bool] = ..., | ||
| kind: str = ..., | ||
| kind: str | None = None, |
| inplace: Literal[True] = ..., | ||
| ascending: bool | typing.Sequence[bool] = ..., | ||
| kind: str = ..., | ||
| kind: str | None = None, |
| else order.descending_over(column_id, na_last) | ||
| ) | ||
| block = self._block.order_by(ordering) | ||
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] |
| raise ValueError(f"No axis named {axis} for object type Series") | ||
| if na_position not in ["first", "last"]: | ||
| raise ValueError("Param na_position must be one of 'first' or 'last'") | ||
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] |
| for column in block.index_columns | ||
| ] | ||
| block = block.order_by(ordering) | ||
| is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"] |
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
Fixes #<issue_number_goes_here> 🦕