diff --git a/doc/release/upcoming_changes/31345.new_feature.rst b/doc/release/upcoming_changes/31345.new_feature.rst new file mode 100644 index 000000000000..6b0320cff003 --- /dev/null +++ b/doc/release/upcoming_changes/31345.new_feature.rst @@ -0,0 +1,7 @@ +New `descending` keyword argument for ``numpy.sort`` and ``numpy.argsort`` +-------------------------------------------------------------------------- +Users can now pass the `descending=True` keyword argument to ``numpy.sort`` and ``numpy.argsort`` +to sort and argsort arrays in descending order. NaN values, if present, are sorted to the end +of the array in both ascending and descending sorts. This feature is available for all built-in +dtypes except `void`, `object`, and `generic`. Note that SIMD optimizations for sorting are +currently not available for descending sorts, so performance may be slower. diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 4875e64dccc1..0df2d78fe838 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1733,6 +1733,7 @@ class _ArrayOrScalarCommon: order: str | Sequence[str] | None = ..., *, stable: py_bool | None = ..., + descending: py_bool | None = ..., ) -> NDArray[intp]: ... @overload # axis=None (default), out=None (default), keepdims=False (default) @@ -3741,6 +3742,7 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeT_co, _DTypeT_co]): order: str | Sequence[str] | None = None, *, stable: py_bool | None = None, + descending: py_bool | None = None, ) -> None: ... # Keep in sync with `MaskedArray.trace` @@ -5068,7 +5070,7 @@ class generic(_ArrayOrScalarCommon, Generic[_ItemT_co]): ) -> Never: ... def diagonal(self: Never, /, offset: L[0] = 0, axis1: L[0] = 0, axis2: L[1] = 1) -> Never: ... # type: ignore[misc] def swapaxes(self: Never, axis1: Never, axis2: Never, /) -> Never: ... # type: ignore[misc] - def sort(self: Never, /, axis: L[-1] = -1, kind: None = None, order: None = None, *, stable: None = None) -> Never: ... # type: ignore[misc] + def sort(self: Never, /, axis: L[-1] = -1, kind: None = None, order: None = None, *, stable: None = None, descending: None = None) -> Never: ... # type: ignore[misc] def nonzero(self: Never, /) -> Never: ... # type: ignore[misc] def setfield(self: Never, val: Never, /, dtype: Never, offset: L[0] = 0) -> None: ... # type: ignore[misc] def searchsorted(self: Never, v: Never, /, side: L["left"] = "left", sorter: None = None) -> Never: ... # type: ignore[misc] diff --git a/numpy/_core/_add_newdocs.py b/numpy/_core/_add_newdocs.py index 63ede7cc2c0b..91c8c0b3e85b 100644 --- a/numpy/_core/_add_newdocs.py +++ b/numpy/_core/_add_newdocs.py @@ -3488,9 +3488,9 @@ def _array_method_doc(name: str, params: str, doc: str) -> None: numpy.argmin : equivalent function """) -_array_method_doc('argsort', "axis=-1, kind=None, order=None, *, stable=None", +_array_method_doc('argsort', "axis=-1, kind=None, order=None, *, stable=None, descending=None", """ - a.argsort(axis=-1, kind=None, order=None, *, stable=None) + a.argsort(axis=-1, kind=None, order=None, *, stable=None, descending=None) Returns the indices that would sort this array. @@ -4413,9 +4413,9 @@ def _array_method_doc(name: str, params: str, doc: str) -> None: ValueError: cannot set WRITEBACKIFCOPY flag to True """) -_array_method_doc('sort', "axis=-1, kind=None, order=None, *, stable=None", +_array_method_doc('sort', "axis=-1, kind=None, order=None, *, stable=None, descending=None", """ - a.sort(axis=-1, kind=None, order=None, *, stable=None) + a.sort(axis=-1, kind=None, order=None, *, stable=None, descending=None) Sort an array in-place. Refer to `numpy.sort` for full documentation. diff --git a/numpy/_core/defchararray.py b/numpy/_core/defchararray.py index 5883bb6be5f5..8d67766b6e97 100644 --- a/numpy/_core/defchararray.py +++ b/numpy/_core/defchararray.py @@ -722,7 +722,7 @@ def __mod__(self, i): def __rmod__(self, other): return NotImplemented - def argsort(self, axis=-1, kind=None, order=None, *, stable=None): + def argsort(self, axis=-1, kind=None, order=None, *, stable=None, descending=None): """ Return the indices that sort the array lexicographically. @@ -740,7 +740,10 @@ def argsort(self, axis=-1, kind=None, order=None, *, stable=None): dtype='|S5') """ - return self.__array__().argsort(axis, kind, order, stable=stable) + return self.__array__().argsort( + axis, kind, order, stable=stable, descending=descending + ) + argsort.__doc__ = ndarray.argsort.__doc__ def capitalize(self): diff --git a/numpy/_core/fromnumeric.py b/numpy/_core/fromnumeric.py index 2d99bdcad011..da40316cc3d4 100644 --- a/numpy/_core/fromnumeric.py +++ b/numpy/_core/fromnumeric.py @@ -932,12 +932,14 @@ def argpartition(a, kth, axis=-1, kind='introselect', order=None): return _wrapfunc(a, 'argpartition', kth, axis=axis, kind=kind, order=order) -def _sort_dispatcher(a, axis=None, kind=None, order=None, *, stable=None): +def _sort_dispatcher( + a, axis=None, kind=None, order=None, *, stable=None, descending=None +): return (a,) @array_function_dispatch(_sort_dispatcher) -def sort(a, axis=-1, kind=None, order=None, *, stable=None): +def sort(a, axis=-1, kind=None, order=None, *, stable=None, descending=None): """ Return a sorted copy of an array. @@ -966,6 +968,13 @@ def sort(a, axis=-1, kind=None, order=None, *, stable=None): this option selects ``kind='stable'``. Default: ``None``. .. versionadded:: 2.0.0 + descending : bool, optional + Sort order. If ``True``, the returned array will be sorted in + descending order. If ``False`` or ``None``, the returned array will + be sorted in ascending order. Values that are NaN are sorted to the + end for both orders. Default: ``None``. + + .. versionadded:: 2.5.0 Returns ------- @@ -1089,16 +1098,18 @@ def sort(a, axis=-1, kind=None, order=None, *, stable=None): axis = -1 else: a = asanyarray(a).copy(order="K") - a.sort(axis=axis, kind=kind, order=order, stable=stable) + a.sort(axis=axis, kind=kind, order=order, stable=stable, descending=descending) return a -def _argsort_dispatcher(a, axis=None, kind=None, order=None, *, stable=None): +def _argsort_dispatcher( + a, axis=None, kind=None, order=None, *, stable=None, descending=None +): return (a,) @array_function_dispatch(_argsort_dispatcher) -def argsort(a, axis=-1, kind=None, order=None, *, stable=None): +def argsort(a, axis=-1, kind=None, order=None, *, stable=None, descending=None): """ Returns the indices that would sort an array. @@ -1131,6 +1142,13 @@ def argsort(a, axis=-1, kind=None, order=None, *, stable=None): this option selects ``kind='stable'``. Default: ``None``. .. versionadded:: 2.0.0 + descending : bool, optional + Sort order. If ``True``, the returned array will be sorted in + descending order. If ``False`` or ``None``, the returned array will + be sorted in ascending order. Values that are NaN are sorted to the + end for both orders. Default: ``None``. + + .. versionadded:: 2.5.0 Returns ------- @@ -1211,7 +1229,13 @@ def argsort(a, axis=-1, kind=None, order=None, *, stable=None): """ return _wrapfunc( - a, 'argsort', axis=axis, kind=kind, order=order, stable=stable + a, + "argsort", + axis=axis, + kind=kind, + order=order, + stable=stable, + descending=descending, ) def _argmax_dispatcher(a, axis=None, out=None, *, keepdims=np._NoValue): diff --git a/numpy/_core/fromnumeric.pyi b/numpy/_core/fromnumeric.pyi index dbff4fcc8283..2de9b8c49c71 100644 --- a/numpy/_core/fromnumeric.pyi +++ b/numpy/_core/fromnumeric.pyi @@ -453,6 +453,7 @@ def sort[ArrayT: np.ndarray]( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> ArrayT: ... @overload def sort[ScalarT: np.generic]( @@ -462,6 +463,7 @@ def sort[ScalarT: np.generic]( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> NDArray[ScalarT]: ... @overload def sort[ScalarT: np.generic]( @@ -471,6 +473,7 @@ def sort[ScalarT: np.generic]( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Array1D[ScalarT]: ... @overload def sort( @@ -480,6 +483,7 @@ def sort( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> NDArray[Any]: ... @overload def sort( @@ -489,6 +493,7 @@ def sort( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Array1D[Any]: ... # @@ -500,6 +505,7 @@ def argsort[ShapeT: _Shape]( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> np.ndarray[ShapeT, np.dtype[np.intp]]: ... @overload def argsort( @@ -509,6 +515,7 @@ def argsort( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> NDArray[np.intp]: ... @overload def argsort( @@ -518,6 +525,7 @@ def argsort( order: str | Sequence[str] | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Array1D[np.intp]: ... # keep in sync with `argmin` below diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index c90346ab4d01..9067388319da 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -1230,11 +1230,11 @@ src_multiarray = multiarray_gen_headers + [ 'src/multiarray/temp_elide.c', 'src/multiarray/usertypes.c', 'src/multiarray/vdot.c', - 'src/npysort/quicksort.cpp', + 'src/npysort/quicksort_generic.cpp', 'src/npysort/mergesort.cpp', - 'src/npysort/timsort.cpp', + 'src/npysort/timsort_generic.cpp', 'src/npysort/heapsort.cpp', - 'src/npysort/radixsort.cpp', + 'src/npysort/npysort_methods.cpp', 'src/common/npy_partition.h', 'src/npysort/selection.cpp', 'src/common/npy_binsearch.h', diff --git a/numpy/_core/src/common/npy_sort.h.src b/numpy/_core/src/common/npy_sort.h.src index 95d6f9d1ee70..35bf7f93b0ef 100644 --- a/numpy/_core/src/common/npy_sort.h.src +++ b/numpy/_core/src/common/npy_sort.h.src @@ -24,72 +24,13 @@ extern "C" { #endif - -/* - ***************************************************************************** - ** NUMERIC SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, half, float, double, longdouble, - * cfloat, cdouble, clongdouble, datetime, timedelta# - */ - -NPY_NO_EXPORT int quicksort_@suff@(void *vec, npy_intp cnt, void *null); -NPY_NO_EXPORT int heapsort_@suff@(void *vec, npy_intp cnt, void *null); -NPY_NO_EXPORT int mergesort_@suff@(void *vec, npy_intp cnt, void *null); -NPY_NO_EXPORT int timsort_@suff@(void *vec, npy_intp cnt, void *null); -NPY_NO_EXPORT int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); -NPY_NO_EXPORT int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); -NPY_NO_EXPORT int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); -NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); - -/**end repeat**/ - -/**begin repeat - * - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong# - */ -#ifdef __cplusplus -extern "C" { -#endif -NPY_NO_EXPORT int radixsort_@suff@(void *vec, npy_intp cnt, void *null); -NPY_NO_EXPORT int aradixsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); -#ifdef __cplusplus -} -#endif - -/**end repeat**/ - - - /* ***************************************************************************** - ** STRING SORTS ** + ** NEW SORT METHOD REGISTRATIONS ** ***************************************************************************** */ - -/**begin repeat - * - * #suff = string, unicode# - */ - -NPY_NO_EXPORT int quicksort_@suff@(void *vec, npy_intp cnt, void *arr); -NPY_NO_EXPORT int heapsort_@suff@(void *vec, npy_intp cnt, void *arr); -NPY_NO_EXPORT int mergesort_@suff@(void *vec, npy_intp cnt, void *arr); -NPY_NO_EXPORT int timsort_@suff@(void *vec, npy_intp cnt, void *arr); -NPY_NO_EXPORT int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -NPY_NO_EXPORT int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -NPY_NO_EXPORT int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); - -/**end repeat**/ +NPY_NO_EXPORT int register_all_sorts(void); /* diff --git a/numpy/_core/src/common/numpy_tag.h b/numpy/_core/src/common/numpy_tag.h index 8ff4b1069ed5..e182b66dfa78 100644 --- a/numpy/_core/src/common/numpy_tag.h +++ b/numpy/_core/src/common/numpy_tag.h @@ -16,8 +16,9 @@ * * - ``type`` -- the underlying C scalar type * - ``type_value`` -- the corresponding ``NPY_TYPES`` enumerator - * - ``less`` / ``less_equal`` -- the sort-friendly comparisons that - * propagate NaN / NaT to the high end + * - ``less`` / ``less_equal`` / ``greater`` -- the sort-friendly + * comparisons that order NaN / NaT to the end (as if largest value + * for less and as if smallest for greater). * * For the four numeric categories that need different NaN/NaT handling, * comparisons are implemented once at the ``*_type`` @@ -49,8 +50,9 @@ template struct integral_type : integral_tag { using type = T; static constexpr NPY_TYPES type_value = TypeNum; - static int less(T a, T b) { return a < b; } - static int less_equal(T a, T b) { return !(b < a); } + static constexpr int less(T a, T b) { return a < b; } + static constexpr int less_equal(T a, T b) { return !(b < a); } + static constexpr int greater(T a, T b) { return a > b; } }; template @@ -59,8 +61,11 @@ struct floating_point_type : floating_point_tag { static constexpr NPY_TYPES type_value = TypeNum; // NaN sorts to the end: a is "less than" b if a is non-NaN and // either a < b or b is NaN. ``x != x`` is the IEEE NaN test. - static int less(T a, T b) { return a < b || (b != b && a == a); } - static int less_equal(T a, T b) { return !less(b, a); } + static constexpr int less(T a, T b) { return a < b || (b != b && a == a); } + static constexpr int less_equal(T a, T b) { return !less(b, a); } + // NaN sorts to the end in reverse too: ``a`` is "greater than" ``b`` + // if ``a`` is non-NaN and either ``b < a`` or ``b`` is NaN. + static constexpr int greater(T a, T b) { return a > b || (b != b && a == a); } }; // Half is its own per-type tag; no template since there is only one half @@ -69,13 +74,13 @@ struct half_tag { using type = npy_half; static constexpr NPY_TYPES type_value = NPY_HALF; - static int isnan(npy_half h) + static constexpr int isnan(npy_half h) { return ((h & 0x7c00u) == 0x7c00u) && ((h & 0x03ffu) != 0x0000u); } // Bit-level less-than that assumes neither operand is NaN. - static int lt_nonan(npy_half a, npy_half b) + static constexpr int lt_nonan(npy_half a, npy_half b) { if (a & 0x8000u) { if (b & 0x8000u) { @@ -90,14 +95,23 @@ struct half_tag { return (a & 0x7fffu) < (b & 0x7fffu); } - static int less(npy_half a, npy_half b) + static constexpr int less(npy_half a, npy_half b) { if (isnan(b)) { return !isnan(a); } return !isnan(a) && lt_nonan(a, b); } - static int less_equal(npy_half a, npy_half b) { return !less(b, a); } + static constexpr int less_equal(npy_half a, npy_half b) { return !less(b, a); } + + // NaN sorts to the end in reverse too. + static constexpr int greater(npy_half a, npy_half b) + { + if (isnan(b)) { + return !isnan(a); + } + return !isnan(a) && lt_nonan(b, a); + } }; template @@ -105,22 +119,13 @@ struct complex_type : complex_tag { using type = T; static constexpr NPY_TYPES type_value = TypeNum; - // Real / imag accessors picked at compile time so ``less`` can be - // written generically across the three complex scalar types. - static auto creal(T z) - { - if constexpr (std::is_same_v) return npy_crealf(z); - else if constexpr (std::is_same_v) return npy_creal(z); - else return npy_creall(z); - } - static auto cimag(T z) - { - if constexpr (std::is_same_v) return npy_cimagf(z); - else if constexpr (std::is_same_v) return npy_cimag(z); - else return npy_cimagl(z); - } + // In C++ mode the npy_c{float,double,longdouble} types are plain + // structs with a ``_Val[2]`` member (see numpy/npy_common.h), access + // directly so this can be a `constexpr` easily. + static constexpr auto creal(T z) { return z._Val[0]; } + static constexpr auto cimag(T z) { return z._Val[1]; } - static int less(T a, T b) + static constexpr int less(T a, T b) { const auto ra = creal(a), rb = creal(b); const auto ia = cimag(a), ib = cimag(b); @@ -135,20 +140,44 @@ struct complex_type : complex_tag { } return rb != rb; } - static int less_equal(T a, T b) { return !less(b, a); } + static constexpr int less_equal(T a, T b) { return !less(b, a); } + + static constexpr int greater(T a, T b) + { + const auto ra = creal(a), rb = creal(b); + const auto ia = cimag(a), ib = cimag(b); + if (ra > rb || (ra == ra && rb != rb)) { + return ia == ia || ib != ib; + } + if (ra < rb || (ra != ra && rb == rb)) { + return ib != ib && ia == ia; + } + if (ra == rb || (ra != ra && rb != rb)) { + return ia > ib || (ib != ib && ia == ia); + } + return ra != ra; + } }; template struct datetime_type : date_tag { using type = T; static constexpr NPY_TYPES type_value = TypeNum; - static int less(T a, T b) + static constexpr int less(T a, T b) { if (a == NPY_DATETIME_NAT) return 0; if (b == NPY_DATETIME_NAT) return 1; return a < b; } - static int less_equal(T a, T b) { return !less(b, a); } + static constexpr int less_equal(T a, T b) { return !less(b, a); } + + // NaT sorts to the end in reverse too. + static constexpr int greater(T a, T b) + { + if (a == NPY_DATETIME_NAT) return 0; + if (b == NPY_DATETIME_NAT) return 1; + return b < a; + } }; // String / unicode tags work on runtime-length blocks. Comparison is @@ -187,6 +216,11 @@ struct string_like_type { { std::memcpy(a, b, n * sizeof(T)); } + + static int greater(T const *a, T const *b, size_t n) + { + return less(b, a, n); + } }; // Concrete tags consumed by callers. @@ -219,6 +253,18 @@ struct taglist { static constexpr unsigned size = sizeof...(Tags); }; +// Generic comparator dispatch used by the ascending/descending sort. +template +constexpr int cmp(Args... args) +{ + if constexpr (reverse) { + return Tag::greater(args...); + } + else { + return Tag::less(args...); + } +} + } // namespace npy #endif // NUMPY_CORE_SRC_COMMON_NUMPY_TAG_H_ diff --git a/numpy/_core/src/multiarray/arraytypes.c.src b/numpy/_core/src/multiarray/arraytypes.c.src index 54cd65e5ebcc..5680b3e22400 100644 --- a/numpy/_core/src/multiarray/arraytypes.c.src +++ b/numpy/_core/src/multiarray/arraytypes.c.src @@ -3997,7 +3997,6 @@ _create_datetime_metadata(NPY_DATETIMEUNIT base, int num) * * #from = VOID, STRING, UNICODE# * #suff = void, string, unicode# - * #sort = 0, 1, 1# * #align = char, char, npy_ucs4# * #NAME = Void, String, Unicode# * #endian = |, |, =# @@ -4039,25 +4038,12 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_NonzeroFunc*)@from@_nonzero, (PyArray_FillFunc*)NULL, (PyArray_FillWithScalarFunc*)NULL, -#if @sort@ - { - quicksort_@suff@, - heapsort_@suff@, - timsort_@suff@ - }, - { - aquicksort_@suff@, - aheapsort_@suff@, - atimsort_@suff@ - }, -#else { NULL, NULL, NULL }, { NULL, NULL, NULL }, -#endif NULL, (PyArray_ScalarKindFunc*)NULL, NULL, @@ -4098,14 +4084,12 @@ static _PyArray_LegacyDescr @from@_Descr = { * half, float, double, longdouble, * cfloat, cdouble, clongdouble, * object, datetime, timedelta# - * #sort = 1*18, 0*1, 1*2# * #fromtype = npy_bool, * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong, * npy_half, npy_float, npy_double, npy_longdouble, * npy_cfloat, npy_cdouble, npy_clongdouble, * PyObject *, npy_datetime, npy_timedelta# - * #rsort = 1*5, 0*16# * #NAME = Bool, * Byte, UByte, Short, UShort, Int, UInt, * Long, ULong, LongLong, ULongLong, @@ -4158,33 +4142,12 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_NonzeroFunc*)@from@_nonzero, (PyArray_FillFunc*)@from@_fill, (PyArray_FillWithScalarFunc*)@from@_fillwithscalar, -#if @sort@ - { - quicksort_@suff@, - heapsort_@suff@, - #if @rsort@ - radixsort_@suff@ - #else - timsort_@suff@ - #endif - }, - { - aquicksort_@suff@, - aheapsort_@suff@, - #if @rsort@ - aradixsort_@suff@ - #else - atimsort_@suff@ - #endif - }, -#else { NULL, NULL, NULL }, { NULL, NULL, NULL }, -#endif NULL, (PyArray_ScalarKindFunc*)NULL, NULL, @@ -4509,7 +4472,6 @@ static int } /**end repeat**/ - /* ***************************************************************************** ** SETUP TYPE INFO ** @@ -4600,6 +4562,13 @@ set_typeinfo(PyObject *dict) initialize_legacy_dtypemeta_aliases(_builtin_descrs); + /* + * Add sorting array methods for the new types. + */ + if (register_all_sorts() < 0) { + return -1; + } + /* * Add cast functions for the new types */ diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 454a434304bb..546bc34c3397 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -3151,7 +3151,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND flags) { PyArrayMethodObject *sort_method = NULL; PyArrayMethod_StridedLoop *strided_loop = NULL; - PyArrayMethod_SortParameters sort_params = {.flags = flags}; + PyArrayMethod_SortParameters sort_params; PyArrayMethod_Context context = {0}; PyArray_Descr *loop_descrs[2]; NpyAuxData *auxdata = NULL; @@ -3172,6 +3172,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND flags) // Zero the NPY_HEAPSORT bit, maps NPY_HEAPSORT to NPY_QUICKSORT flags &= ~_NPY_SORT_HEAPSORT; + sort_params = (PyArrayMethod_SortParameters){.flags = flags}; // Look for type specific functions sort_method = NPY_DT_SLOTS(NPY_DTYPE(PyArray_DESCR(op)))->sort_meth; @@ -3269,7 +3270,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND flags) PyObject *ret; PyArrayMethodObject *argsort_method = NULL; PyArrayMethod_StridedLoop *strided_loop = NULL; - PyArrayMethod_SortParameters sort_params = {.flags = flags}; + PyArrayMethod_SortParameters sort_params; PyArrayMethod_Context context = {0}; PyArray_Descr *loop_descrs[2]; NpyAuxData *auxdata = NULL; @@ -3280,6 +3281,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND flags) // Zero the NPY_HEAPSORT bit, maps NPY_HEAPSORT to NPY_QUICKSORT flags &= ~_NPY_SORT_HEAPSORT; + sort_params = (PyArrayMethod_SortParameters){.flags = flags}; // Look for type specific functions argsort_method = NPY_DT_SLOTS(NPY_DTYPE(PyArray_DESCR(op)))->argsort_meth; diff --git a/numpy/_core/src/multiarray/methods.c b/numpy/_core/src/multiarray/methods.c index ce21261648c5..b154d3c96d79 100644 --- a/numpy/_core/src/multiarray/methods.c +++ b/numpy/_core/src/multiarray/methods.c @@ -1267,8 +1267,8 @@ array_sort(PyArrayObject *self, {"|axis", &PyArray_PythonPyIntFromInt, &axis}, {"|kind", &PyArray_SortkindConverter, &sortkind}, {"|order", NULL, &order}, - {"$stable", &PyArray_OptionalBoolConverter, &stable} - // {"$descending", &PyArray_OptionalBoolConverter, &descending} + {"$stable", &PyArray_OptionalBoolConverter, &stable}, + {"$descending", &PyArray_OptionalBoolConverter, &descending} ) < 0) { return NULL; } @@ -1428,9 +1428,8 @@ array_argsort(PyArrayObject *self, {"|axis", &PyArray_AxisConverter, &axis}, {"|kind", &PyArray_SortkindConverter, &sortkind}, {"|order", NULL, &order}, - {"$stable", &PyArray_OptionalBoolConverter, &stable} - // TODO: add descending sorts, gh-14728 - // {"$descending", &PyArray_OptionalBoolConverter, &descending} + {"$stable", &PyArray_OptionalBoolConverter, &stable}, + {"$descending", &PyArray_OptionalBoolConverter, &descending} ) < 0) { return NULL; } diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index eb97b0ff267d..2687406d09e9 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -5144,6 +5144,13 @@ _multiarray_umath_exec(PyObject *m) { (PyObject *)&NpyBusDayCalendar_Type); set_flaginfo(d); + if (PyType_Ready(&PyArrayMethod_Type) < 0) { + return -1; + } + if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) { + return -1; + } + /* Finalize scalar types and expose them via namespace or typeinfo dict */ if (set_typeinfo(d) != 0) { return -1; @@ -5163,12 +5170,6 @@ _multiarray_umath_exec(PyObject *m) { d, "_array_converter", (PyObject *)&PyArrayArrayConverter_Type); - if (PyType_Ready(&PyArrayMethod_Type) < 0) { - return -1; - } - if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) { - return -1; - } if (initialize_and_map_pytypes_to_dtypes() < 0) { return -1; } diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 492cd47262d8..8af640579142 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -164,379 +164,3 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) return 0; } - -/*************************************** - * C > C++ dispatch - ***************************************/ -template NPY_NO_EXPORT int -heapsort_(npy_bool *, npy_intp); -NPY_NO_EXPORT int -heapsort_bool(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_bool *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_byte *, npy_intp); -NPY_NO_EXPORT int -heapsort_byte(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_byte *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_ubyte *, npy_intp); -NPY_NO_EXPORT int -heapsort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_ubyte *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_short *, npy_intp); -NPY_NO_EXPORT int -heapsort_short(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_short *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_ushort *, npy_intp); -NPY_NO_EXPORT int -heapsort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_ushort *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_int *, npy_intp); -NPY_NO_EXPORT int -heapsort_int(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_int *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_uint *, npy_intp); -NPY_NO_EXPORT int -heapsort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_uint *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_long *, npy_intp); -NPY_NO_EXPORT int -heapsort_long(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_long *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_ulong *, npy_intp); -NPY_NO_EXPORT int -heapsort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_ulong *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_longlong *, npy_intp); -NPY_NO_EXPORT int -heapsort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_longlong *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_ulonglong *, npy_intp); -NPY_NO_EXPORT int -heapsort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_ulonglong *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_half *, npy_intp); -NPY_NO_EXPORT int -heapsort_half(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_half *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_float *, npy_intp); -NPY_NO_EXPORT int -heapsort_float(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_float *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_double *, npy_intp); -NPY_NO_EXPORT int -heapsort_double(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_double *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_longdouble *, npy_intp); -NPY_NO_EXPORT int -heapsort_longdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_longdouble *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_cfloat *, npy_intp); -NPY_NO_EXPORT int -heapsort_cfloat(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_cfloat *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_cdouble *, npy_intp); -NPY_NO_EXPORT int -heapsort_cdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_cdouble *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_clongdouble *, npy_intp); -NPY_NO_EXPORT int -heapsort_clongdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_clongdouble *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_datetime *, npy_intp); -NPY_NO_EXPORT int -heapsort_datetime(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_datetime *)start, n); -} - -template NPY_NO_EXPORT int -heapsort_(npy_timedelta *, npy_intp); -NPY_NO_EXPORT int -heapsort_timedelta(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return heapsort_((npy_timedelta *)start, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_bool *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_bool(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_bool *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_byte *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_byte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_byte *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_ubyte *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_ubyte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_ubyte *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_short *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_short(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_short *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_ushort *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_ushort(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_ushort *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_int *vv, npy_intp *tosort, npy_intp n); -NPY_NO_EXPORT int -aheapsort_int(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_int *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_uint *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_uint(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_uint *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_long *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_long(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_long *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_ulong *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_ulong(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_ulong *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_longlong *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_longlong(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_longlong *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_ulonglong *vv, - npy_intp *tosort, npy_intp n); -NPY_NO_EXPORT int -aheapsort_ulonglong(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_ulonglong *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_half *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_half(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_half *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_float *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_float(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_float *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_double *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_double(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_double *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_longdouble *vv, - npy_intp *tosort, npy_intp n); -NPY_NO_EXPORT int -aheapsort_longdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_longdouble *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_cfloat *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_cfloat(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_cfloat *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_cdouble *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_cdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_cdouble *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_clongdouble *vv, - npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_clongdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_clongdouble *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_datetime *vv, npy_intp *tosort, - npy_intp n); -NPY_NO_EXPORT int -aheapsort_datetime(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_datetime *)vv, tosort, n); -} - -template NPY_NO_EXPORT int -aheapsort_(npy_timedelta *vv, - npy_intp *tosort, npy_intp n); -NPY_NO_EXPORT int -aheapsort_timedelta(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aheapsort_((npy_timedelta *)vv, tosort, n); -} - -NPY_NO_EXPORT int -heapsort_string(void *start, npy_intp n, void *varr) -{ - return string_heapsort_((npy_char *)start, n, varr); -} -NPY_NO_EXPORT int -heapsort_unicode(void *start, npy_intp n, void *varr) -{ - return string_heapsort_((npy_ucs4 *)start, n, varr); -} - -NPY_NO_EXPORT int -aheapsort_string(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - return string_aheapsort_((npy_char *)vv, tosort, n, varr); -} -NPY_NO_EXPORT int -aheapsort_unicode(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - return string_aheapsort_((npy_ucs4 *)vv, tosort, n, - varr); -} diff --git a/numpy/_core/src/npysort/heapsort.hpp b/numpy/_core/src/npysort/heapsort.hpp index 2bdba6781ba3..c63bb875cc69 100644 --- a/numpy/_core/src/npysort/heapsort.hpp +++ b/numpy/_core/src/npysort/heapsort.hpp @@ -5,23 +5,41 @@ namespace np::sort { -template -inline bool LessThan(const T &a, const T &b) +// Strict-weak-less comparator: returns true iff ``a`` sorts before ``b``. +// ``reverse=true`` flips the relational test (so it really means "greater"), +// while keeping NaN-at-end semantics for floating-point types. +template +constexpr bool Cmp(const T &a, const T &b) { if constexpr (std::is_floating_point_v) { - return a < b || (b != b && a == a); + if constexpr (reverse) { + return a > b || (b != b && a == a); + } + else { + return a < b || (b != b && a == a); + } } else if constexpr(std::is_same_v) { bool a_nn = !a.IsNaN(); - return b.IsNaN() ? a_nn : a_nn && a.Less(b); + if constexpr (reverse) { + return b.IsNaN() ? a_nn : a_nn && b.Less(a); + } + else { + return b.IsNaN() ? a_nn : a_nn && a.Less(b); + } } else { - return a < b; + if constexpr (reverse) { + return a > b; + } + else { + return a < b; + } } } // NUMERIC SORTS -template +template inline void Heap(T *start, SSize n) { SSize i, j, l; @@ -31,10 +49,10 @@ inline void Heap(T *start, SSize n) for (l = n >> 1; l > 0; --l) { tmp = a[l]; for (i = l, j = l << 1; j <= n;) { - if (j < n && LessThan(a[j], a[j + 1])) { + if (j < n && Cmp(a[j], a[j + 1])) { j += 1; } - if (LessThan(tmp, a[j])) { + if (Cmp(tmp, a[j])) { a[i] = a[j]; i = j; j += j; @@ -51,10 +69,10 @@ inline void Heap(T *start, SSize n) a[n] = a[1]; n -= 1; for (i = 1, j = 2; j <= n;) { - if (j < n && LessThan(a[j], a[j + 1])) { + if (j < n && Cmp(a[j], a[j + 1])) { j++; } - if (LessThan(tmp, a[j])) { + if (Cmp(tmp, a[j])) { a[i] = a[j]; i = j; j += j; diff --git a/numpy/_core/src/npysort/highway_qsort.dispatch.cpp b/numpy/_core/src/npysort/highway_qsort.dispatch.cpp index 2893e817af08..4a6ab1f0c9ad 100644 --- a/numpy/_core/src/npysort/highway_qsort.dispatch.cpp +++ b/numpy/_core/src/npysort/highway_qsort.dispatch.cpp @@ -3,25 +3,35 @@ #include "hwy/contrib/sort/vqsort-inl.h" #include "highway_qsort.hpp" -#include "quicksort.hpp" +#include "quicksort_generic.hpp" namespace np::highway::qsort_simd { template -void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size) +void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size, bool reverse) { #if VQSORT_ENABLED - hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending()); + if (reverse) { + hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortDescending()); + } + else { + hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending()); + } #else - sort::Quick(arr, size); + if (reverse) { + sort::Quick(arr, size); + } + else { + sort::Quick(arr, size); + } #endif } -template void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(uint32_t*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(int64_t*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(float*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(double*, npy_intp); +template void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(uint32_t*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(int64_t*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(float*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(double*, npy_intp, bool); } // np::highway::qsort_simd diff --git a/numpy/_core/src/npysort/highway_qsort.hpp b/numpy/_core/src/npysort/highway_qsort.hpp index 371f2c2fbe7d..55de984e946d 100644 --- a/numpy/_core/src/npysort/highway_qsort.hpp +++ b/numpy/_core/src/npysort/highway_qsort.hpp @@ -6,10 +6,10 @@ namespace np::highway::qsort_simd { #include "highway_qsort.dispatch.h" -NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size)) +NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size, bool reverse)) #include "highway_qsort_16bit.dispatch.h" -NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size)) +NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size, bool reverse)) } // np::highway::qsort_simd diff --git a/numpy/_core/src/npysort/highway_qsort_16bit.dispatch.cpp b/numpy/_core/src/npysort/highway_qsort_16bit.dispatch.cpp index a7466709654d..9ced88124307 100644 --- a/numpy/_core/src/npysort/highway_qsort_16bit.dispatch.cpp +++ b/numpy/_core/src/npysort/highway_qsort_16bit.dispatch.cpp @@ -3,31 +3,51 @@ #include "hwy/contrib/sort/vqsort-inl.h" #include "highway_qsort.hpp" -#include "quicksort.hpp" +#include "quicksort_generic.hpp" namespace np::highway::qsort_simd { template -void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size) +void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size, bool reverse) { #if VQSORT_ENABLED using THwy = std::conditional_t, hwy::float16_t, T>; - hwy::HWY_NAMESPACE::VQSortStatic(reinterpret_cast(arr), size, hwy::SortAscending()); + if (reverse) { + hwy::HWY_NAMESPACE::VQSortStatic(reinterpret_cast(arr), size, hwy::SortDescending()); + } + else { + hwy::HWY_NAMESPACE::VQSortStatic(reinterpret_cast(arr), size, hwy::SortAscending()); + } #else - sort::Quick(arr, size); + if (reverse) { + sort::Quick(arr, size); + } + else { + sort::Quick(arr, size); + } #endif } + #if !HWY_HAVE_FLOAT16 +// Highway's float16 vector sort isn't compiled in; provide a scalar +// specialization so ``Half`` still has a working symbol at link time and +// the primary template body above doesn't try to instantiate +// ``VQSortStatic``. template <> -void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size) +void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size, bool reverse) { - sort::Quick(arr, size); + if (reverse) { + sort::Quick(arr, size); + } + else { + sort::Quick(arr, size); + } } #endif // !HWY_HAVE_FLOAT16 -template void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t*, npy_intp); -template void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t*, npy_intp); +template void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t*, npy_intp, bool); +template void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t*, npy_intp, bool); #if HWY_HAVE_FLOAT16 -template void NPY_CPU_DISPATCH_CURFX(QSort)(Half*, npy_intp); +template void NPY_CPU_DISPATCH_CURFX(QSort)(Half*, npy_intp, bool); #endif } // np::highway::qsort_simd diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 1cfe04b1d266..07094f209aee 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -46,7 +46,7 @@ ***************************************************************************** */ -template +template static void mergesort0_(type *pl, type *pr, type *pw) { @@ -55,8 +55,8 @@ mergesort0_(type *pl, type *pr, type *pw) if (pr - pl > SMALL_MERGESORT) { /* merge sort */ pm = pl + ((pr - pl) >> 1); - mergesort0_(pl, pm, pw); - mergesort0_(pm, pr, pw); + mergesort0_(pl, pm, pw); + mergesort0_(pm, pr, pw); for (pi = pw, pj = pl; pj < pm;) { *pi++ = *pj++; } @@ -64,7 +64,7 @@ mergesort0_(type *pl, type *pr, type *pw) pj = pw; pk = pl; while (pj < pi && pm < pr) { - if (Tag::less(*pm, *pj)) { + if (npy::cmp(*pm, *pj)) { *pk++ = *pm++; } else { @@ -81,7 +81,7 @@ mergesort0_(type *pl, type *pr, type *pw) vp = *pi; pj = pi; pk = pi - 1; - while (pj > pl && Tag::less(vp, *pk)) { + while (pj > pl && npy::cmp(vp, *pk)) { *pj-- = *pk--; } *pj = vp; @@ -89,7 +89,7 @@ mergesort0_(type *pl, type *pr, type *pw) } } -template +template NPY_NO_EXPORT int mergesort_(type *start, npy_intp num) { @@ -101,13 +101,13 @@ mergesort_(type *start, npy_intp num) if (pw == NULL) { return -NPY_ENOMEM; } - mergesort0_(pl, pr, pw); + mergesort0_(pl, pr, pw); free(pw); return 0; } -template +template static void amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw) { @@ -117,8 +117,8 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw) if (pr - pl > SMALL_MERGESORT) { /* merge sort */ pm = pl + ((pr - pl) >> 1); - amergesort0_(pl, pm, v, pw); - amergesort0_(pm, pr, v, pw); + amergesort0_(pl, pm, v, pw); + amergesort0_(pm, pr, v, pw); for (pi = pw, pj = pl; pj < pm;) { *pi++ = *pj++; } @@ -126,7 +126,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw) pj = pw; pk = pl; while (pj < pi && pm < pr) { - if (Tag::less(v[*pm], v[*pj])) { + if (npy::cmp(v[*pm], v[*pj])) { *pk++ = *pm++; } else { @@ -144,7 +144,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw) vp = v[vi]; pj = pi; pk = pi - 1; - while (pj > pl && Tag::less(vp, v[*pk])) { + while (pj > pl && npy::cmp(vp, v[*pk])) { *pj-- = *pk--; } *pj = vi; @@ -152,7 +152,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw) } } -template +template NPY_NO_EXPORT int amergesort_(type *v, npy_intp *tosort, npy_intp num) { @@ -164,7 +164,7 @@ amergesort_(type *v, npy_intp *tosort, npy_intp num) if (pw == NULL) { return -NPY_ENOMEM; } - amergesort0_(pl, pr, v, pw); + amergesort0_(pl, pr, v, pw); free(pw); return 0; @@ -177,7 +177,7 @@ amergesort_(type *v, npy_intp *tosort, npy_intp num) ***************************************************************************** */ -template +template static void mergesort0_(type *pl, type *pr, type *pw, type *vp, size_t len) { @@ -186,14 +186,14 @@ mergesort0_(type *pl, type *pr, type *pw, type *vp, size_t len) if ((size_t)(pr - pl) > SMALL_MERGESORT * len) { /* merge sort */ pm = pl + (((pr - pl) / len) >> 1) * len; - mergesort0_(pl, pm, pw, vp, len); - mergesort0_(pm, pr, pw, vp, len); + mergesort0_(pl, pm, pw, vp, len); + mergesort0_(pm, pr, pw, vp, len); Tag::copy(pw, pl, pm - pl); pi = pw + (pm - pl); pj = pw; pk = pl; while (pj < pi && pm < pr) { - if (Tag::less(pm, pj, len)) { + if (npy::cmp(pm, pj, len)) { Tag::copy(pk, pm, len); pm += len; pk += len; @@ -212,7 +212,7 @@ mergesort0_(type *pl, type *pr, type *pw, type *vp, size_t len) Tag::copy(vp, pi, len); pj = pi; pk = pi - len; - while (pj > pl && Tag::less(vp, pk, len)) { + while (pj > pl && npy::cmp(vp, pk, len)) { Tag::copy(pj, pk, len); pj -= len; pk -= len; @@ -222,12 +222,10 @@ mergesort0_(type *pl, type *pr, type *pw, type *vp, size_t len) } } -template +template static int -string_mergesort_(type *start, npy_intp num, void *varr) +string_mergesort_(type *start, npy_intp num, int elsize) { - PyArrayObject *arr = (PyArrayObject *)varr; - size_t elsize = PyArray_ITEMSIZE(arr); size_t len = elsize / sizeof(type); type *pl, *pr, *pw, *vp; int err = 0; @@ -249,7 +247,7 @@ string_mergesort_(type *start, npy_intp num, void *varr) err = -NPY_ENOMEM; goto fail_1; } - mergesort0_(pl, pr, pw, vp, len); + mergesort0_(pl, pr, pw, vp, len); free(vp); fail_1: @@ -258,7 +256,7 @@ string_mergesort_(type *start, npy_intp num, void *varr) return err; } -template +template static void amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw, size_t len) { @@ -268,8 +266,8 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw, size_t len) if (pr - pl > SMALL_MERGESORT) { /* merge sort */ pm = pl + ((pr - pl) >> 1); - amergesort0_(pl, pm, v, pw, len); - amergesort0_(pm, pr, v, pw, len); + amergesort0_(pl, pm, v, pw, len); + amergesort0_(pm, pr, v, pw, len); for (pi = pw, pj = pl; pj < pm;) { *pi++ = *pj++; } @@ -277,7 +275,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw, size_t len) pj = pw; pk = pl; while (pj < pi && pm < pr) { - if (Tag::less(v + (*pm) * len, v + (*pj) * len, len)) { + if (npy::cmp(v + (*pm) * len, v + (*pj) * len, len)) { *pk++ = *pm++; } else { @@ -295,7 +293,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw, size_t len) vp = v + vi * len; pj = pi; pk = pi - 1; - while (pj > pl && Tag::less(vp, v + (*pk) * len, len)) { + while (pj > pl && npy::cmp(vp, v + (*pk) * len, len)) { *pj-- = *pk--; } *pj = vi; @@ -303,7 +301,7 @@ amergesort0_(npy_intp *pl, npy_intp *pr, type *v, npy_intp *pw, size_t len) } } -template +template static int string_amergesort_(type *v, npy_intp *tosort, npy_intp num, void *varr) { @@ -323,7 +321,7 @@ string_amergesort_(type *v, npy_intp *tosort, npy_intp num, void *varr) if (pw == NULL) { return -NPY_ENOMEM; } - amergesort0_(pl, pr, v, pw, len); + amergesort0_(pl, pr, v, pw, len); free(pw); return 0; @@ -500,255 +498,3 @@ npy_amergesort_impl(void *v, npy_intp *tosort, npy_intp num, void *varr, return 0; } - -/*************************************** - * C > C++ dispatch - ***************************************/ -NPY_NO_EXPORT int -mergesort_bool(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_bool *)start, num); -} -NPY_NO_EXPORT int -mergesort_byte(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_byte *)start, num); -} -NPY_NO_EXPORT int -mergesort_ubyte(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_ubyte *)start, num); -} -NPY_NO_EXPORT int -mergesort_short(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_short *)start, num); -} -NPY_NO_EXPORT int -mergesort_ushort(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_ushort *)start, num); -} -NPY_NO_EXPORT int -mergesort_int(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_int *)start, num); -} -NPY_NO_EXPORT int -mergesort_uint(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_uint *)start, num); -} -NPY_NO_EXPORT int -mergesort_long(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_long *)start, num); -} -NPY_NO_EXPORT int -mergesort_ulong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_ulong *)start, num); -} -NPY_NO_EXPORT int -mergesort_longlong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_longlong *)start, num); -} -NPY_NO_EXPORT int -mergesort_ulonglong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_ulonglong *)start, num); -} -NPY_NO_EXPORT int -mergesort_half(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_half *)start, num); -} -NPY_NO_EXPORT int -mergesort_float(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_float *)start, num); -} -NPY_NO_EXPORT int -mergesort_double(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_double *)start, num); -} -NPY_NO_EXPORT int -mergesort_longdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_longdouble *)start, num); -} -NPY_NO_EXPORT int -mergesort_cfloat(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_cfloat *)start, num); -} -NPY_NO_EXPORT int -mergesort_cdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_cdouble *)start, num); -} -NPY_NO_EXPORT int -mergesort_clongdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_clongdouble *)start, num); -} -NPY_NO_EXPORT int -mergesort_datetime(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_datetime *)start, num); -} -NPY_NO_EXPORT int -mergesort_timedelta(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return mergesort_((npy_timedelta *)start, num); -} - -NPY_NO_EXPORT int -amergesort_bool(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_bool *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_byte(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_byte *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_ubyte(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_ubyte *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_short(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_short *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_ushort(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_ushort *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_int(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_int *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_uint(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_uint *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_long(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_long *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_ulong(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_ulong *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_longlong(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_longlong *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_ulonglong(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_ulonglong *)start, tosort, - num); -} -NPY_NO_EXPORT int -amergesort_half(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_half *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_float(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_float *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_double(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_double *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_longdouble(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_longdouble *)start, tosort, - num); -} -NPY_NO_EXPORT int -amergesort_cfloat(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_cfloat *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_cdouble(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_cdouble *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_clongdouble(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_clongdouble *)start, tosort, - num); -} -NPY_NO_EXPORT int -amergesort_datetime(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_datetime *)start, tosort, num); -} -NPY_NO_EXPORT int -amergesort_timedelta(void *start, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return amergesort_((npy_timedelta *)start, tosort, - num); -} - -NPY_NO_EXPORT int -mergesort_string(void *start, npy_intp num, void *varr) -{ - return string_mergesort_((npy_char *)start, num, varr); -} -NPY_NO_EXPORT int -mergesort_unicode(void *start, npy_intp num, void *varr) -{ - return string_mergesort_((npy_ucs4 *)start, num, varr); -} -NPY_NO_EXPORT int -amergesort_string(void *v, npy_intp *tosort, npy_intp num, void *varr) -{ - return string_amergesort_((npy_char *)v, tosort, num, - varr); -} -NPY_NO_EXPORT int -amergesort_unicode(void *v, npy_intp *tosort, npy_intp num, void *varr) -{ - return string_amergesort_((npy_ucs4 *)v, tosort, num, - varr); -} diff --git a/numpy/_core/src/npysort/npysort_heapsort.h b/numpy/_core/src/npysort/npysort_heapsort.h index 16750b817382..a938147f65e1 100644 --- a/numpy/_core/src/npysort/npysort_heapsort.h +++ b/numpy/_core/src/npysort/npysort_heapsort.h @@ -15,7 +15,7 @@ ***************************************************************************** */ -template +template inline NPY_NO_EXPORT int heapsort_(type *start, npy_intp n) { @@ -28,10 +28,10 @@ int heapsort_(type *start, npy_intp n) for (l = n >> 1; l > 0; --l) { tmp = a[l]; for (i = l, j = l << 1; j <= n;) { - if (j < n && Tag::less(a[j], a[j + 1])) { + if (j < n && npy::cmp(a[j], a[j + 1])) { j += 1; } - if (Tag::less(tmp, a[j])) { + if (npy::cmp(tmp, a[j])) { a[i] = a[j]; i = j; j += j; @@ -48,10 +48,10 @@ int heapsort_(type *start, npy_intp n) a[n] = a[1]; n -= 1; for (i = 1, j = 2; j <= n;) { - if (j < n && Tag::less(a[j], a[j + 1])) { + if (j < n && npy::cmp(a[j], a[j + 1])) { j++; } - if (Tag::less(tmp, a[j])) { + if (npy::cmp(tmp, a[j])) { a[i] = a[j]; i = j; j += j; @@ -66,7 +66,15 @@ int heapsort_(type *start, npy_intp n) return 0; } -template +// ``PyArray_SortFunc``-shaped trampoline. +template +inline NPY_NO_EXPORT int +heapsort_impl(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_((type *)start, n); +} + +template inline NPY_NO_EXPORT int aheapsort_(type *vv, npy_intp *tosort, npy_intp n) { @@ -78,10 +86,10 @@ int aheapsort_(type *vv, npy_intp *tosort, npy_intp n) for (l = n >> 1; l > 0; --l) { tmp = a[l]; for (i = l, j = l << 1; j <= n;) { - if (j < n && Tag::less(v[a[j]], v[a[j + 1]])) { + if (j < n && npy::cmp(v[a[j]], v[a[j + 1]])) { j += 1; } - if (Tag::less(v[tmp], v[a[j]])) { + if (npy::cmp(v[tmp], v[a[j]])) { a[i] = a[j]; i = j; j += j; @@ -98,10 +106,10 @@ int aheapsort_(type *vv, npy_intp *tosort, npy_intp n) a[n] = a[1]; n -= 1; for (i = 1, j = 2; j <= n;) { - if (j < n && Tag::less(v[a[j]], v[a[j + 1]])) { + if (j < n && npy::cmp(v[a[j]], v[a[j + 1]])) { j++; } - if (Tag::less(v[tmp], v[a[j]])) { + if (npy::cmp(v[tmp], v[a[j]])) { a[i] = a[j]; i = j; j += j; @@ -116,23 +124,31 @@ int aheapsort_(type *vv, npy_intp *tosort, npy_intp n) return 0; } +// ``PyArray_ArgSortFunc``-shaped trampoline. +template +inline NPY_NO_EXPORT int +aheapsort_impl(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_((type *)vv, tosort, n); +} + /* ***************************************************************************** ** STRING SORTS ** ***************************************************************************** */ -template +template inline NPY_NO_EXPORT -int string_heapsort_(type *start, npy_intp n, void *varr) +int string_heapsort_(type *start, npy_intp n, int elsize) { - PyArrayObject *arr = (PyArrayObject *)varr; - size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + size_t len = elsize / sizeof(type); if (len == 0) { return 0; /* no need for sorting if strings are empty */ } - type *tmp = (type *)malloc(PyArray_ITEMSIZE(arr)); + type *tmp = (type *)malloc(elsize); type *a = (type *)start - len; npy_intp i, j, l; @@ -143,9 +159,9 @@ int string_heapsort_(type *start, npy_intp n, void *varr) for (l = n >> 1; l > 0; --l) { Tag::copy(tmp, a + l * len, len); for (i = l, j = l << 1; j <= n;) { - if (j < n && Tag::less(a + j * len, a + (j + 1) * len, len)) + if (j < n && npy::cmp(a + j * len, a + (j + 1) * len, len)) j += 1; - if (Tag::less(tmp, a + j * len, len)) { + if (npy::cmp(tmp, a + j * len, len)) { Tag::copy(a + i * len, a + j * len, len); i = j; j += j; @@ -162,9 +178,9 @@ int string_heapsort_(type *start, npy_intp n, void *varr) Tag::copy(a + n * len, a + len, len); n -= 1; for (i = 1, j = 2; j <= n;) { - if (j < n && Tag::less(a + j * len, a + (j + 1) * len, len)) + if (j < n && npy::cmp(a + j * len, a + (j + 1) * len, len)) j++; - if (Tag::less(tmp, a + j * len, len)) { + if (npy::cmp(tmp, a + j * len, len)) { Tag::copy(a + i * len, a + j * len, len); i = j; j += j; @@ -180,13 +196,12 @@ int string_heapsort_(type *start, npy_intp n, void *varr) return 0; } -template +template inline NPY_NO_EXPORT -int string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, void *varr) +int string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, int elsize) { type *v = vv; - PyArrayObject *arr = (PyArrayObject *)varr; - size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + size_t len = elsize / sizeof(type); npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ @@ -195,9 +210,9 @@ int string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, void *varr) for (l = n >> 1; l > 0; --l) { tmp = a[l]; for (i = l, j = l << 1; j <= n;) { - if (j < n && Tag::less(v + a[j] * len, v + a[j + 1] * len, len)) + if (j < n && npy::cmp(v + a[j] * len, v + a[j + 1] * len, len)) j += 1; - if (Tag::less(v + tmp * len, v + a[j] * len, len)) { + if (npy::cmp(v + tmp * len, v + a[j] * len, len)) { a[i] = a[j]; i = j; j += j; @@ -214,9 +229,9 @@ int string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, void *varr) a[n] = a[1]; n -= 1; for (i = 1, j = 2; j <= n;) { - if (j < n && Tag::less(v + a[j] * len, v + a[j + 1] * len, len)) + if (j < n && npy::cmp(v + a[j] * len, v + a[j + 1] * len, len)) j++; - if (Tag::less(v + tmp * len, v + a[j] * len, len)) { + if (npy::cmp(v + tmp * len, v + a[j] * len, len)) { a[i] = a[j]; i = j; j += j; diff --git a/numpy/_core/src/npysort/npysort_methods.cpp b/numpy/_core/src/npysort/npysort_methods.cpp new file mode 100644 index 000000000000..62a50d07d8bb --- /dev/null +++ b/numpy/_core/src/npysort/npysort_methods.cpp @@ -0,0 +1,358 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy_tag.h" +#include "gil_utils.h" +#include "quicksort.hpp" +#include "radixsort.hpp" +#include "timsort.hpp" + +#include + +static NPY_CASTING +sort_resolve_descriptors(PyArrayMethodObject *method, PyArray_DTypeMeta *const *dtypes, + PyArray_Descr *const *input_descrs, + PyArray_Descr **output_descrs, npy_intp *view_offset) +{ + output_descrs[0] = NPY_DT_CALL_ensure_canonical(input_descrs[0]); + if (NPY_UNLIKELY(output_descrs[0] == NULL)) { + return _NPY_ERROR_OCCURRED_IN_CAST; + } + Py_INCREF(output_descrs[0]); + output_descrs[1] = output_descrs[0]; + + return method->casting; +} + +static NPY_CASTING +argsort_resolve_descriptors(PyArrayMethodObject *method, + PyArray_DTypeMeta *const *dtypes, + PyArray_Descr *const *input_descrs, + PyArray_Descr **output_descrs, npy_intp *view_offset) +{ + output_descrs[0] = NPY_DT_CALL_ensure_canonical(input_descrs[0]); + if (NPY_UNLIKELY(output_descrs[0] == NULL)) { + return _NPY_ERROR_OCCURRED_IN_CAST; + } + if (input_descrs[1] == NULL) { + output_descrs[1] = PyArray_DescrFromType(NPY_INTP); + } + else { + output_descrs[1] = NPY_DT_CALL_ensure_canonical(input_descrs[1]); + } + if (NPY_UNLIKELY(output_descrs[1] == NULL)) { + Py_XDECREF(output_descrs[0]); + return _NPY_ERROR_OCCURRED_IN_CAST; + } + + return method->casting; +} + +template +static int +sort_loop_(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + constexpr bool use_radixsort = ( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v + ); + + PyArrayMethod_SortParameters *params = + (PyArrayMethod_SortParameters *)context->parameters; + switch ((int)params->flags) { + case NPY_SORT_DEFAULT: + return quicksort_((type *)data[0], dimensions[0]); + case NPY_SORT_STABLE: + if constexpr (use_radixsort) { + return radixsort(data[0], dimensions[0]); + } + else { + return timsort_((type *)data[0], dimensions[0]); + } + case NPY_SORT_DEFAULT | NPY_SORT_DESCENDING: + return quicksort_((type *)data[0], dimensions[0]); + case NPY_SORT_STABLE | NPY_SORT_DESCENDING: + return timsort_((type *)data[0], dimensions[0]); + default: + npy_gil_error(PyExc_RuntimeError, "unknown sort kind %d", + (int)params->flags); + return -1; + } +} + +template +static int +argsort_loop_(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + constexpr bool use_radixsort = ( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v + ); + + PyArrayMethod_SortParameters *params = + (PyArrayMethod_SortParameters *)context->parameters; + switch ((int)params->flags) { + case NPY_SORT_DEFAULT: + return aquicksort_((type *)data[0], (npy_intp *)data[1], + dimensions[0]); + case NPY_SORT_STABLE: + if constexpr (use_radixsort) { + return aradixsort(data[0], (npy_intp *)data[1], dimensions[0]); + } + else { + return atimsort_((type *)data[0], (npy_intp *)data[1], + dimensions[0]); + } + case NPY_SORT_DEFAULT | NPY_SORT_DESCENDING: + return aquicksort_((type *)data[0], (npy_intp *)data[1], + dimensions[0]); + case NPY_SORT_STABLE | NPY_SORT_DESCENDING: + return atimsort_((type *)data[0], (npy_intp *)data[1], + dimensions[0]); + default: + npy_gil_error(PyExc_RuntimeError, "unknown sort kind %d", + (int)params->flags); + return -1; + } +} + +template +static int +sort_loop_string_(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyArrayMethod_SortParameters *params = + (PyArrayMethod_SortParameters *)context->parameters; + int elsize = context->descriptors[0]->elsize; + switch ((int)params->flags) { + case NPY_SORT_DEFAULT: + return string_quicksort_((type *)data[0], dimensions[0], + elsize); + case NPY_SORT_STABLE: + return string_timsort_((type *)data[0], dimensions[0], + elsize); + case NPY_SORT_DEFAULT | NPY_SORT_DESCENDING: + return string_quicksort_((type *)data[0], dimensions[0], + elsize); + case NPY_SORT_STABLE | NPY_SORT_DESCENDING: + return string_timsort_((type *)data[0], dimensions[0], + elsize); + default: + npy_gil_error(PyExc_RuntimeError, "unknown sort kind %d", + (int)params->flags); + return -1; + } +} + +template +static int +argsort_loop_string_(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyArrayMethod_SortParameters *params = + (PyArrayMethod_SortParameters *)context->parameters; + int elsize = context->descriptors[0]->elsize; + switch ((int)params->flags) { + case NPY_SORT_DEFAULT: + return string_aquicksort_( + (type *)data[0], (npy_intp *)data[1], dimensions[0], elsize); + case NPY_SORT_STABLE: + return string_atimsort_( + (type *)data[0], (npy_intp *)data[1], dimensions[0], elsize); + case NPY_SORT_DEFAULT | NPY_SORT_DESCENDING: + return string_aquicksort_( + (type *)data[0], (npy_intp *)data[1], dimensions[0], elsize); + case NPY_SORT_STABLE | NPY_SORT_DESCENDING: + return string_atimsort_( + (type *)data[0], (npy_intp *)data[1], dimensions[0], elsize); + default: + npy_gil_error(PyExc_RuntimeError, "unknown sort kind %d", + (int)params->flags); + return -1; + } +} + +template +NPY_NO_EXPORT int +make_sorts_(PyArray_DTypeMeta *dtypemeta, const char *name) +{ + using type = typename Tag::type; + constexpr bool use_radixsort = ( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v + ); + + NPY_DT_SLOTS(dtypemeta)->f.sort[0] = quicksort_impl; + NPY_DT_SLOTS(dtypemeta)->f.sort[1] = heapsort_impl; + if constexpr (use_radixsort) { + NPY_DT_SLOTS(dtypemeta)->f.sort[2] = radixsort_impl; + } + else { + NPY_DT_SLOTS(dtypemeta)->f.sort[2] = timsort_impl; + } + + NPY_DT_SLOTS(dtypemeta)->f.argsort[0] = aquicksort_impl; + NPY_DT_SLOTS(dtypemeta)->f.argsort[1] = aheapsort_impl; + if constexpr (use_radixsort) { + NPY_DT_SLOTS(dtypemeta)->f.argsort[2] = aradixsort_impl; + } + else { + NPY_DT_SLOTS(dtypemeta)->f.argsort[2] = atimsort_impl; + } + + std::string sort_name = std::string(name) + "_sort"; + PyArray_DTypeMeta *sort_dtypes[2] = {dtypemeta, dtypemeta}; + PyType_Slot sort_slots[3] = { + {NPY_METH_resolve_descriptors, + reinterpret_cast(sort_resolve_descriptors)}, + {NPY_METH_strided_loop, reinterpret_cast(sort_loop_)}, + {0, NULL}}; + PyArrayMethod_Spec sort_spec = { + sort_name.c_str(), + 1, + 1, + NPY_NO_CASTING, + NPY_METH_NO_FLOATINGPOINT_ERRORS, + sort_dtypes, + sort_slots, + }; + PyBoundArrayMethodObject *sort_method = PyArrayMethod_FromSpec_int(&sort_spec, 1); + if (sort_method == NULL) { + return -1; + } + NPY_DT_SLOTS(dtypemeta)->sort_meth = sort_method->method; + Py_INCREF(sort_method->method); + Py_DECREF(sort_method); + + std::string argsort_name = std::string(name) + "_argsort"; + PyArray_DTypeMeta *argsort_dtypes[2] = {dtypemeta, &PyArray_IntpDType}; + PyType_Slot argsort_slots[3] = { + {NPY_METH_resolve_descriptors, + reinterpret_cast(argsort_resolve_descriptors)}, + {NPY_METH_strided_loop, reinterpret_cast(argsort_loop_)}, + {0, NULL}}; + PyArrayMethod_Spec argsort_spec = { + argsort_name.c_str(), + 1, + 1, + NPY_NO_CASTING, + NPY_METH_NO_FLOATINGPOINT_ERRORS, + argsort_dtypes, + argsort_slots, + }; + PyBoundArrayMethodObject *argsort_method = + PyArrayMethod_FromSpec_int(&argsort_spec, 1); + if (argsort_method == NULL) { + return -1; + } + NPY_DT_SLOTS(dtypemeta)->argsort_meth = argsort_method->method; + Py_INCREF(argsort_method->method); + Py_DECREF(argsort_method); + + return 0; +} + +template +NPY_NO_EXPORT int +make_string_sorts_(PyArray_DTypeMeta *dtypemeta, const char *name) +{ + using type = typename Tag::type; + + std::string sort_name = std::string(name) + "_sort"; + PyArray_DTypeMeta *sort_dtypes[2] = {dtypemeta, dtypemeta}; + PyType_Slot sort_slots[3] = { + {NPY_METH_resolve_descriptors, + reinterpret_cast(sort_resolve_descriptors)}, + {NPY_METH_strided_loop, + reinterpret_cast(sort_loop_string_)}, + {0, NULL}}; + PyArrayMethod_Spec sort_spec = { + sort_name.c_str(), + 1, + 1, + NPY_NO_CASTING, + NPY_METH_NO_FLOATINGPOINT_ERRORS, + sort_dtypes, + sort_slots, + }; + PyBoundArrayMethodObject *sort_method = PyArrayMethod_FromSpec_int(&sort_spec, 1); + if (sort_method == NULL) { + return -1; + } + NPY_DT_SLOTS(dtypemeta)->sort_meth = sort_method->method; + Py_INCREF(sort_method->method); + Py_DECREF(sort_method); + + std::string argsort_name = std::string(name) + "_argsort"; + PyArray_DTypeMeta *argsort_dtypes[2] = {dtypemeta, &PyArray_IntpDType}; + PyType_Slot argsort_slots[3] = { + {NPY_METH_resolve_descriptors, + reinterpret_cast(argsort_resolve_descriptors)}, + {NPY_METH_strided_loop, + reinterpret_cast(argsort_loop_string_)}, + {0, NULL}}; + PyArrayMethod_Spec argsort_spec = { + argsort_name.c_str(), + 1, + 1, + NPY_NO_CASTING, + NPY_METH_NO_FLOATINGPOINT_ERRORS, + argsort_dtypes, + argsort_slots, + }; + PyBoundArrayMethodObject *argsort_method = + PyArrayMethod_FromSpec_int(&argsort_spec, 1); + if (argsort_method == NULL) { + return -1; + } + NPY_DT_SLOTS(dtypemeta)->argsort_meth = argsort_method->method; + Py_INCREF(argsort_method->method); + Py_DECREF(argsort_method); + + return 0; +} + +int register_all_sorts() { + int r = 0; + + r += make_sorts_(&PyArray_BoolDType, "bool"); + r += make_sorts_(&PyArray_ByteDType, "byte"); + r += make_sorts_(&PyArray_UByteDType, "ubyte"); + r += make_sorts_(&PyArray_ShortDType, "short"); + r += make_sorts_(&PyArray_UShortDType, "ushort"); + r += make_sorts_(&PyArray_IntDType, "int"); + r += make_sorts_(&PyArray_UIntDType, "uint"); + r += make_sorts_(&PyArray_LongDType, "long"); + r += make_sorts_(&PyArray_ULongDType, "ulong"); + r += make_sorts_(&PyArray_LongLongDType, "longlong"); + r += make_sorts_(&PyArray_ULongLongDType, "ulonglong"); + r += make_sorts_(&PyArray_FloatDType, "float"); + r += make_sorts_(&PyArray_DoubleDType, "double"); + r += make_sorts_(&PyArray_LongDoubleDType, "longdouble"); + r += make_sorts_(&PyArray_CFloatDType, "cfloat"); + r += make_sorts_(&PyArray_CDoubleDType, "cdouble"); + r += make_sorts_(&PyArray_CLongDoubleDType, "clongdouble"); + r += make_sorts_(&PyArray_DatetimeDType, "datetime"); + r += make_sorts_(&PyArray_TimedeltaDType, "timedelta"); + r += make_string_sorts_(&PyArray_BytesDType, "string"); + r += make_string_sorts_(&PyArray_UnicodeDType, "unicode"); + r += make_sorts_(&PyArray_HalfDType, "half"); + + return r; +} diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp deleted file mode 100644 index 3371c02aef49..000000000000 --- a/numpy/_core/src/npysort/quicksort.cpp +++ /dev/null @@ -1,1023 +0,0 @@ -/* -*- c -*- */ - -/* - * The purpose of this module is to add faster sort functions - * that are type-specific. This is done by altering the - * function table for the builtin descriptors. - * - * These sorting functions are copied almost directly from numarray - * with a few modifications (complex comparisons compare the imaginary - * part if the real parts are equal, for example), and the names - * are changed. - * - * The original sorting code is due to Charles R. Harris who wrote - * it for numarray. - */ - -/* - * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so - * the code switches to the O(NlogN) worst case heapsort if not enough progress - * is made on the large side of the two quicksort partitions. This improves the - * worst case while still retaining the speed of quicksort for the common case. - * This is variant known as introsort. - * - * - * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): - * # sort remainder with heapsort if we are not making enough progress - * # we arbitrarily choose 2 * log(n) as the cutoff point - * if recursion_limit < 0: - * heapsort(lower, higher) - * return - * - * if lower < higher: - * pivot_pos = partition(lower, higher) - * # recurse into smaller first and leave larger on stack - * # this limits the required stack space - * if (pivot_pos - lower > higher - pivot_pos): - * quicksort(pivot_pos + 1, higher, recursion_limit - 1) - * quicksort(lower, pivot_pos, recursion_limit - 1) - * else: - * quicksort(lower, pivot_pos, recursion_limit - 1) - * quicksort(pivot_pos + 1, higher, recursion_limit - 1) - * - * - * the below code implements this converted to an iteration and as an - * additional minor optimization skips the recursion depth checking on the - * smaller partition as it is always less than half of the remaining data and - * will thus terminate fast enough - */ - -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "npy_cpu_features.h" -#include "npy_sort.h" -#include "npysort_common.h" -#include "npysort_heapsort.h" -#include "numpy_tag.h" -#include "x86_simd_qsort.hpp" -#include "highway_qsort.hpp" - -#include -#include - -#define NOT_USED NPY_UNUSED(unused) - -/* - * pushing largest partition has upper bound of log2(n) space - * we store two pointers each time - */ -#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) -#define SMALL_QUICKSORT 15 -#define SMALL_MERGESORT 20 -#define SMALL_STRING 16 - -// Disable AVX512 sorting on CYGWIN until we can figure -// out why it has test failures -template -inline bool quicksort_dispatch(T *start, npy_intp num) -{ -#if !defined(__CYGWIN__) - using TF = typename np::meta::FixedWidth::Type; - void (*dispfunc)(TF*, intptr_t) = nullptr; - if constexpr (sizeof(T) == sizeof(uint16_t)) { - #if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit - #include "x86_simd_qsort_16bit.dispatch.h" - NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, ); - #else - #include "highway_qsort_16bit.dispatch.h" - NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, ); - #endif - } - else if constexpr (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) { - #if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit - #include "x86_simd_qsort.dispatch.h" - NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, ); - #else - #include "highway_qsort.dispatch.h" - NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, ); - #endif - } - if (dispfunc) { - (*dispfunc)(reinterpret_cast(start), static_cast(num)); - return true; - } -#endif // __CYGWIN__ - (void)start; (void)num; // to avoid unused arg warn - return false; -} - -template -inline bool aquicksort_dispatch(T *start, npy_intp* arg, npy_intp num) -{ -#if !defined(__CYGWIN__) - using TF = typename np::meta::FixedWidth::Type; - void (*dispfunc)(TF*, npy_intp*, npy_intp) = nullptr; - #include "x86_simd_argsort.dispatch.h" - NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template ArgQSort, ); - if (dispfunc) { - (*dispfunc)(reinterpret_cast(start), arg, num); - return true; - } -#endif // __CYGWIN__ - (void)start; (void)arg; (void)num; // to avoid unused arg warn - return false; -} - -/* - ***************************************************************************** - ** NUMERIC SORTS ** - ***************************************************************************** - */ - -template -static int -quicksort_(type *start, npy_intp num) -{ - type vp; - type *pl = start; - type *pr = pl + num - 1; - type *stack[PYA_QS_STACK]; - type **sptr = stack; - type *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - heapsort_(pl, pr - pl + 1); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (Tag::less(*pm, *pl)) { - std::swap(*pm, *pl); - } - if (Tag::less(*pr, *pm)) { - std::swap(*pr, *pm); - } - if (Tag::less(*pm, *pl)) { - std::swap(*pm, *pl); - } - vp = *pm; - pi = pl; - pj = pr - 1; - std::swap(*pm, *pj); - for (;;) { - do { - ++pi; - } while (Tag::less(*pi, vp)); - do { - --pj; - } while (Tag::less(vp, *pj)); - if (pi >= pj) { - break; - } - std::swap(*pi, *pj); - } - pk = pr - 1; - std::swap(*pi, *pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vp = *pi; - pj = pi; - pk = pi - 1; - while (pj > pl && Tag::less(vp, *pk)) { - *pj-- = *pk--; - } - *pj = vp; - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -template -static int -aquicksort_(type *vv, npy_intp *tosort, npy_intp num) -{ - type *v = vv; - type vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr = stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - aheapsort_(vv, pl, pr - pl + 1); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (Tag::less(v[*pm], v[*pl])) { - std::swap(*pm, *pl); - } - if (Tag::less(v[*pr], v[*pm])) { - std::swap(*pr, *pm); - } - if (Tag::less(v[*pm], v[*pl])) { - std::swap(*pm, *pl); - } - vp = v[*pm]; - pi = pl; - pj = pr - 1; - std::swap(*pm, *pj); - for (;;) { - do { - ++pi; - } while (Tag::less(v[*pi], vp)); - do { - --pj; - } while (Tag::less(vp, v[*pj])); - if (pi >= pj) { - break; - } - std::swap(*pi, *pj); - } - pk = pr - 1; - std::swap(*pi, *pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v[vi]; - pj = pi; - pk = pi - 1; - while (pj > pl && Tag::less(vp, v[*pk])) { - *pj-- = *pk--; - } - *pj = vi; - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -/* - ***************************************************************************** - ** STRING SORTS ** - ***************************************************************************** - */ - -template -static int -string_quicksort_(type *start, npy_intp num, void *varr) -{ - PyArrayObject *arr = (PyArrayObject *)varr; - const size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); - type *vp; - type *pl = start; - type *pr = pl + (num - 1) * len; - type *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - vp = (type *)malloc(PyArray_ITEMSIZE(arr)); - if (vp == NULL) { - return -NPY_ENOMEM; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - string_heapsort_(pl, (pr - pl) / len + 1, varr); - goto stack_pop; - } - while ((size_t)(pr - pl) > SMALL_QUICKSORT * len) { - /* quicksort partition */ - pm = pl + (((pr - pl) / len) >> 1) * len; - if (Tag::less(pm, pl, len)) { - Tag::swap(pm, pl, len); - } - if (Tag::less(pr, pm, len)) { - Tag::swap(pr, pm, len); - } - if (Tag::less(pm, pl, len)) { - Tag::swap(pm, pl, len); - } - Tag::copy(vp, pm, len); - pi = pl; - pj = pr - len; - Tag::swap(pm, pj, len); - for (;;) { - do { - pi += len; - } while (Tag::less(pi, vp, len)); - do { - pj -= len; - } while (Tag::less(vp, pj, len)); - if (pi >= pj) { - break; - } - Tag::swap(pi, pj, len); - } - pk = pr - len; - Tag::swap(pi, pk, len); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + len; - *sptr++ = pr; - pr = pi - len; - } - else { - *sptr++ = pl; - *sptr++ = pi - len; - pl = pi + len; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + len; pi <= pr; pi += len) { - Tag::copy(vp, pi, len); - pj = pi; - pk = pi - len; - while (pj > pl && Tag::less(vp, pk, len)) { - Tag::copy(pj, pk, len); - pj -= len; - pk -= len; - } - Tag::copy(pj, vp, len); - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - free(vp); - return 0; -} - -template -static int -string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) -{ - type *v = vv; - PyArrayObject *arr = (PyArrayObject *)varr; - size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); - type *vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr = stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - string_aheapsort_(vv, pl, pr - pl + 1, varr); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (Tag::less(v + (*pm) * len, v + (*pl) * len, len)) { - std::swap(*pm, *pl); - } - if (Tag::less(v + (*pr) * len, v + (*pm) * len, len)) { - std::swap(*pr, *pm); - } - if (Tag::less(v + (*pm) * len, v + (*pl) * len, len)) { - std::swap(*pm, *pl); - } - vp = v + (*pm) * len; - pi = pl; - pj = pr - 1; - std::swap(*pm, *pj); - for (;;) { - do { - ++pi; - } while (Tag::less(v + (*pi) * len, vp, len)); - do { - --pj; - } while (Tag::less(vp, v + (*pj) * len, len)); - if (pi >= pj) { - break; - } - std::swap(*pi, *pj); - } - pk = pr - 1; - std::swap(*pi, *pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v + vi * len; - pj = pi; - pk = pi - 1; - while (pj > pl && Tag::less(vp, v + (*pk) * len, len)) { - *pj-- = *pk--; - } - *pj = vi; - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -/* - ***************************************************************************** - ** GENERIC SORT ** - ***************************************************************************** - */ - -NPY_NO_EXPORT int -npy_quicksort(void *start, npy_intp num, void *varr) -{ - npy_intp elsize; - PyArray_CompareFunc *cmp; - get_sort_data_from_array(varr, &elsize, &cmp); - - return npy_quicksort_impl(start, num, varr, elsize, cmp); -} - -NPY_NO_EXPORT int -npy_quicksort_impl(void *start, npy_intp num, void *varr, - npy_intp elsize, PyArray_CompareFunc *cmp) -{ - void *arr = varr; - char *vp; - char *pl = (char *)start; - char *pr = pl + (num - 1) * elsize; - char *stack[PYA_QS_STACK]; - char **sptr = stack; - char *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (elsize == 0) { - return 0; - } - - vp = (char *)malloc(elsize); - if (vp == NULL) { - return -NPY_ENOMEM; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - npy_heapsort(pl, (pr - pl) / elsize + 1, varr); - goto stack_pop; - } - while (pr - pl > SMALL_QUICKSORT * elsize) { - /* quicksort partition */ - pm = pl + (((pr - pl) / elsize) >> 1) * elsize; - if (cmp(pm, pl, arr) < 0) { - GENERIC_SWAP(pm, pl, elsize); - } - if (cmp(pr, pm, arr) < 0) { - GENERIC_SWAP(pr, pm, elsize); - } - if (cmp(pm, pl, arr) < 0) { - GENERIC_SWAP(pm, pl, elsize); - } - GENERIC_COPY(vp, pm, elsize); - pi = pl; - pj = pr - elsize; - GENERIC_SWAP(pm, pj, elsize); - /* - * Generic comparisons may be buggy, so don't rely on the sentinels - * to keep the pointers from going out of bounds. - */ - for (;;) { - do { - pi += elsize; - } while (cmp(pi, vp, arr) < 0 && pi < pj); - do { - pj -= elsize; - } while (cmp(vp, pj, arr) < 0 && pi < pj); - if (pi >= pj) { - break; - } - GENERIC_SWAP(pi, pj, elsize); - } - pk = pr - elsize; - GENERIC_SWAP(pi, pk, elsize); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + elsize; - *sptr++ = pr; - pr = pi - elsize; - } - else { - *sptr++ = pl; - *sptr++ = pi - elsize; - pl = pi + elsize; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + elsize; pi <= pr; pi += elsize) { - GENERIC_COPY(vp, pi, elsize); - pj = pi; - pk = pi - elsize; - while (pj > pl && cmp(vp, pk, arr) < 0) { - GENERIC_COPY(pj, pk, elsize); - pj -= elsize; - pk -= elsize; - } - GENERIC_COPY(pj, vp, elsize); - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - free(vp); - return 0; -} - -NPY_NO_EXPORT int -npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) -{ - npy_intp elsize; - PyArray_CompareFunc *cmp; - get_sort_data_from_array(varr, &elsize, &cmp); - - return npy_aquicksort_impl(vv, tosort, num, varr, elsize, cmp); -} - -NPY_NO_EXPORT int -npy_aquicksort_impl(void *vv, npy_intp *tosort, npy_intp num, void *varr, - npy_intp elsize, PyArray_CompareFunc *cmp) -{ - void *arr = varr; - char *v = (char *)vv; - char *vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr = stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int *psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (elsize == 0) { - return 0; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - npy_aheapsort(vv, pl, pr - pl + 1, varr); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { - INTP_SWAP(*pm, *pl); - } - if (cmp(v + (*pr) * elsize, v + (*pm) * elsize, arr) < 0) { - INTP_SWAP(*pr, *pm); - } - if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { - INTP_SWAP(*pm, *pl); - } - vp = v + (*pm) * elsize; - pi = pl; - pj = pr - 1; - INTP_SWAP(*pm, *pj); - for (;;) { - do { - ++pi; - } while (cmp(v + (*pi) * elsize, vp, arr) < 0 && pi < pj); - do { - --pj; - } while (cmp(vp, v + (*pj) * elsize, arr) < 0 && pi < pj); - if (pi >= pj) { - break; - } - INTP_SWAP(*pi, *pj); - } - pk = pr - 1; - INTP_SWAP(*pi, *pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v + vi * elsize; - pj = pi; - pk = pi - 1; - while (pj > pl && cmp(vp, v + (*pk) * elsize, arr) < 0) { - *pj-- = *pk--; - } - *pj = vi; - } - stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -/*************************************** - * C > C++ dispatch - ***************************************/ - -NPY_NO_EXPORT int -quicksort_bool(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_bool *)start, n); -} -NPY_NO_EXPORT int -quicksort_byte(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_byte *)start, n); -} -NPY_NO_EXPORT int -quicksort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_ubyte *)start, n); -} -NPY_NO_EXPORT int -quicksort_short(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_short *)start, n)) { - return 0; - } - return quicksort_((npy_short *)start, n); -} -NPY_NO_EXPORT int -quicksort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_ushort *)start, n)) { - return 0; - } - return quicksort_((npy_ushort *)start, n); -} -NPY_NO_EXPORT int -quicksort_int(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_int *)start, n)) { - return 0; - } - return quicksort_((npy_int *)start, n); -} -NPY_NO_EXPORT int -quicksort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_uint *)start, n)) { - return 0; - } - return quicksort_((npy_uint *)start, n); -} -NPY_NO_EXPORT int -quicksort_long(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_long *)start, n)) { - return 0; - } - return quicksort_((npy_long *)start, n); -} -NPY_NO_EXPORT int -quicksort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_ulong *)start, n)) { - return 0; - } - return quicksort_((npy_ulong *)start, n); -} -NPY_NO_EXPORT int -quicksort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_longlong *)start, n)) { - return 0; - } - return quicksort_((npy_longlong *)start, n); -} -NPY_NO_EXPORT int -quicksort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_ulonglong *)start, n)) { - return 0; - } - return quicksort_((npy_ulonglong *)start, n); -} -NPY_NO_EXPORT int -quicksort_half(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((np::Half *)start, n)) { - return 0; - } - return quicksort_((npy_half *)start, n); -} -NPY_NO_EXPORT int -quicksort_float(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_float *)start, n)) { - return 0; - } - return quicksort_((npy_float *)start, n); -} -NPY_NO_EXPORT int -quicksort_double(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (quicksort_dispatch((npy_double *)start, n)) { - return 0; - } - return quicksort_((npy_double *)start, n); -} -NPY_NO_EXPORT int -quicksort_longdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_longdouble *)start, n); -} -NPY_NO_EXPORT int -quicksort_cfloat(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_cfloat *)start, n); -} -NPY_NO_EXPORT int -quicksort_cdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_cdouble *)start, n); -} -NPY_NO_EXPORT int -quicksort_clongdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_clongdouble *)start, n); -} -NPY_NO_EXPORT int -quicksort_datetime(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_datetime *)start, n); -} -NPY_NO_EXPORT int -quicksort_timedelta(void *start, npy_intp n, void *NPY_UNUSED(varr)) -{ - return quicksort_((npy_timedelta *)start, n); -} - -NPY_NO_EXPORT int -aquicksort_bool(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_bool *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_byte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_byte *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_ubyte(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_ubyte *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_short(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_short *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_ushort(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_ushort *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_int(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_int *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_int *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_uint(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_uint *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_uint *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_long(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_long *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_long *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_ulong(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_ulong *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_ulong *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_longlong(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_longlong *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_longlong *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_ulonglong(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_ulonglong *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_ulonglong *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_half(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_half *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_float(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_float *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_float *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_double(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - if (aquicksort_dispatch((npy_double *)vv, tosort, n)) { - return 0; - } - return aquicksort_((npy_double *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_longdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_longdouble *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_cfloat(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_cfloat *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_cdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_cdouble *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_clongdouble(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_clongdouble *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_datetime(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_datetime *)vv, tosort, n); -} -NPY_NO_EXPORT int -aquicksort_timedelta(void *vv, npy_intp *tosort, npy_intp n, - void *NPY_UNUSED(varr)) -{ - return aquicksort_((npy_timedelta *)vv, tosort, n); -} - -NPY_NO_EXPORT int -quicksort_string(void *start, npy_intp n, void *varr) -{ - return string_quicksort_((npy_char *)start, n, varr); -} -NPY_NO_EXPORT int -quicksort_unicode(void *start, npy_intp n, void *varr) -{ - return string_quicksort_((npy_ucs4 *)start, n, varr); -} - -NPY_NO_EXPORT int -aquicksort_string(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - return string_aquicksort_((npy_char *)vv, tosort, n, - varr); -} -NPY_NO_EXPORT int -aquicksort_unicode(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - return string_aquicksort_((npy_ucs4 *)vv, tosort, n, - varr); -} diff --git a/numpy/_core/src/npysort/quicksort.hpp b/numpy/_core/src/npysort/quicksort.hpp index c8c821c06c05..e3cbcd155e4d 100644 --- a/numpy/_core/src/npysort/quicksort.hpp +++ b/numpy/_core/src/npysort/quicksort.hpp @@ -1,44 +1,139 @@ -#ifndef NUMPY_SRC_COMMON_NPYSORT_QUICKSORT_HPP -#define NUMPY_SRC_COMMON_NPYSORT_QUICKSORT_HPP +#define NPY_NO_DEPRECATED_API NPY_API_VERSION -#include "heapsort.hpp" -#include "common.hpp" +#include "npy_cpu_features.h" +#include "npy_sort.h" +#include "npysort_common.h" +#include "npysort_heapsort.h" +#include "numpy_tag.h" +#include "x86_simd_qsort.hpp" +#include "highway_qsort.hpp" -namespace np::sort { +#include +#include -// pushing largest partition has upper bound of log2(n) space -// we store two pointers each time -constexpr size_t kQuickStack = sizeof(intptr_t) * 8 * 2; -constexpr ptrdiff_t kQuickSmall = 15; +#define NOT_USED NPY_UNUSED(unused) -// NUMERIC SORTS -template -inline void Quick(T *start, SSize num) +/* + * pushing largest partition has upper bound of log2(n) space + * we store two pointers each time + */ +#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + +// Disable AVX512 sorting on CYGWIN until we can figure +// out why it has test failures +template +inline bool quicksort_dispatch(T *start, npy_intp num) { - T vp; - T *pl = start; - T *pr = pl + num - 1; - T *stack[kQuickStack]; - T **sptr = stack; - T *pm, *pi, *pj, *pk; - int depth[kQuickStack]; +#if !defined(__CYGWIN__) + if constexpr ( + (std::is_base_of_v + && !std::is_same_v) || + std::is_base_of_v || + std::is_same_v + ) { + using TF = typename np::meta::FixedWidth::Type; + void (*dispfunc)(TF*, intptr_t, bool) = nullptr; + if constexpr (sizeof(T) == sizeof(uint16_t)) { + #if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit + if constexpr (!reverse) { // x86 SIMD sort is ascending-only + #include "x86_simd_qsort_16bit.dispatch.h" + NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, ); + } + #else + #include "highway_qsort_16bit.dispatch.h" + NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, ); + #endif + } + else if constexpr (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) { + #if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit + if constexpr (!reverse) { // x86 SIMD sort is ascending-only + #include "x86_simd_qsort.dispatch.h" + NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, ); + } + #else + #include "highway_qsort.dispatch.h" + NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, ); + #endif + } + if (dispfunc) { + (*dispfunc)(reinterpret_cast(start), static_cast(num), reverse); + return true; + } + } +#endif // __CYGWIN__ + (void)start; (void)num; // to avoid unused arg warn + return false; +} + +template +inline bool aquicksort_dispatch(T *start, npy_intp* arg, npy_intp num) +{ +#if !defined(__CYGWIN__) + if constexpr ( + ((std::is_base_of_v + && !std::is_same_v) || + std::is_base_of_v) + && !reverse // x86 SIMD argsort is ascending-only + ) { + using TF = typename np::meta::FixedWidth::Type; + void (*dispfunc)(TF*, npy_intp*, npy_intp, bool) = nullptr; + if constexpr (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) { + #include "x86_simd_argsort.dispatch.h" + NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template ArgQSort, ); + } + if (dispfunc) { + (*dispfunc)(reinterpret_cast(start), arg, num, reverse); + return true; + } + } +#endif // __CYGWIN__ + (void)start; (void)arg; (void)num; // to avoid unused arg warn + return false; +} + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + +template +static int +quicksort_(type *start, npy_intp num) +{ + using T = typename std::conditional, np::Half, typename Tag::type>::type; + if (quicksort_dispatch((T *)start, num)) { + return 0; + } + + type vp; + type *pl = start; + type *pr = pl + num - 1; + type *stack[PYA_QS_STACK]; + type **sptr = stack; + type *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; int *psdepth = depth; - int cdepth = BitScanReverse(static_cast>(num)) * 2; + int cdepth = npy_get_msb(num) * 2; + for (;;) { if (NPY_UNLIKELY(cdepth < 0)) { - Heap(pl, pr - pl + 1); + heapsort_(pl, pr - pl + 1); goto stack_pop; } - while ((pr - pl) > kQuickSmall) { - // quicksort partition + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ pm = pl + ((pr - pl) >> 1); - if (LessThan(*pm, *pl)) { + if (npy::cmp(*pm, *pl)) { std::swap(*pm, *pl); } - if (LessThan(*pr, *pm)) { + if (npy::cmp(*pr, *pm)) { std::swap(*pr, *pm); } - if (LessThan(*pm, *pl)) { + if (npy::cmp(*pm, *pl)) { std::swap(*pm, *pl); } vp = *pm; @@ -48,10 +143,10 @@ inline void Quick(T *start, SSize num) for (;;) { do { ++pi; - } while (LessThan(*pi, vp)); + } while (npy::cmp(*pi, vp)); do { --pj; - } while (LessThan(vp, *pj)); + } while (npy::cmp(vp, *pj)); if (pi >= pj) { break; } @@ -59,7 +154,7 @@ inline void Quick(T *start, SSize num) } pk = pr - 1; std::swap(*pi, *pk); - // push largest partition on stack + /* push largest partition on stack */ if (pi - pl < pr - pi) { *sptr++ = pi + 1; *sptr++ = pr; @@ -78,7 +173,7 @@ inline void Quick(T *start, SSize num) vp = *pi; pj = pi; pk = pi - 1; - while (pj > pl && LessThan(vp, *pk)) { + while (pj > pl && npy::cmp(vp, *pk)) { *pj-- = *pk--; } *pj = vp; @@ -91,6 +186,310 @@ inline void Quick(T *start, SSize num) pl = *(--sptr); cdepth = *(--psdepth); } + + return 0; +} + +// ``PyArray_SortFunc``-shaped trampoline. +template +static int +quicksort_impl(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return quicksort_((type *)start, num); } -} // np::sort -#endif // NUMPY_SRC_COMMON_NPYSORT_QUICK_HPP + +template +static int +aquicksort_(type *vv, npy_intp *tosort, npy_intp num) +{ + if (aquicksort_dispatch((type *)vv, tosort, num)) { + return 0; + } + + type *v = vv; + type vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + aheapsort_(vv, pl, pr - pl + 1); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (npy::cmp(v[*pm], v[*pl])) { + std::swap(*pm, *pl); + } + if (npy::cmp(v[*pr], v[*pm])) { + std::swap(*pr, *pm); + } + if (npy::cmp(v[*pm], v[*pl])) { + std::swap(*pm, *pl); + } + vp = v[*pm]; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do { + ++pi; + } while (npy::cmp(v[*pi], vp)); + do { + --pj; + } while (npy::cmp(vp, v[*pj])); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v[vi]; + pj = pi; + pk = pi - 1; + while (pj > pl && npy::cmp(vp, v[*pk])) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +// ``PyArray_ArgSortFunc``-shaped trampoline. +template +static int +aquicksort_impl(void *vv, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return aquicksort_((type *)vv, tosort, num); +} + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + +template +static int +string_quicksort_(type *start, npy_intp num, int elsize) +{ + const size_t len = elsize / sizeof(type); + type *vp; + type *pl = start; + type *pr = pl + (num - 1) * len; + type *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + vp = (type *)malloc(elsize); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + string_heapsort_(pl, (pr - pl) / len + 1, elsize); + goto stack_pop; + } + while ((size_t)(pr - pl) > SMALL_QUICKSORT * len) { + /* quicksort partition */ + pm = pl + (((pr - pl) / len) >> 1) * len; + if (npy::cmp(pm, pl, len)) { + Tag::swap(pm, pl, len); + } + if (npy::cmp(pr, pm, len)) { + Tag::swap(pr, pm, len); + } + if (npy::cmp(pm, pl, len)) { + Tag::swap(pm, pl, len); + } + Tag::copy(vp, pm, len); + pi = pl; + pj = pr - len; + Tag::swap(pm, pj, len); + for (;;) { + do { + pi += len; + } while (npy::cmp(pi, vp, len)); + do { + pj -= len; + } while (npy::cmp(vp, pj, len)); + if (pi >= pj) { + break; + } + Tag::swap(pi, pj, len); + } + pk = pr - len; + Tag::swap(pi, pk, len); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + len; + *sptr++ = pr; + pr = pi - len; + } + else { + *sptr++ = pl; + *sptr++ = pi - len; + pl = pi + len; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + len; pi <= pr; pi += len) { + Tag::copy(vp, pi, len); + pj = pi; + pk = pi - len; + while (pj > pl && npy::cmp(vp, pk, len)) { + Tag::copy(pj, pk, len); + pj -= len; + pk -= len; + } + Tag::copy(pj, vp, len); + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + +template +static int +string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, int elsize) +{ + type *v = vv; + size_t len = elsize / sizeof(type); + type *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + string_aheapsort_(vv, pl, pr - pl + 1, elsize); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (npy::cmp(v + (*pm) * len, v + (*pl) * len, len)) { + std::swap(*pm, *pl); + } + if (npy::cmp(v + (*pr) * len, v + (*pm) * len, len)) { + std::swap(*pr, *pm); + } + if (npy::cmp(v + (*pm) * len, v + (*pl) * len, len)) { + std::swap(*pm, *pl); + } + vp = v + (*pm) * len; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do { + ++pi; + } while (npy::cmp(v + (*pi) * len, vp, len)); + do { + --pj; + } while (npy::cmp(vp, v + (*pj) * len, len)); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi * len; + pj = pi; + pk = pi - 1; + while (pj > pl && npy::cmp(vp, v + (*pk) * len, len)) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} \ No newline at end of file diff --git a/numpy/_core/src/npysort/quicksort_generic.cpp b/numpy/_core/src/npysort/quicksort_generic.cpp new file mode 100644 index 000000000000..b76efab9d0d7 --- /dev/null +++ b/numpy/_core/src/npysort/quicksort_generic.cpp @@ -0,0 +1,294 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so + * the code switches to the O(NlogN) worst case heapsort if not enough progress + * is made on the large side of the two quicksort partitions. This improves the + * worst case while still retaining the speed of quicksort for the common case. + * This is variant known as introsort. + * + * + * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): + * # sort remainder with heapsort if we are not making enough progress + * # we arbitrarily choose 2 * log(n) as the cutoff point + * if recursion_limit < 0: + * heapsort(lower, higher) + * return + * + * if lower < higher: + * pivot_pos = partition(lower, higher) + * # recurse into smaller first and leave larger on stack + * # this limits the required stack space + * if (pivot_pos - lower > higher - pivot_pos): + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * quicksort(lower, pivot_pos, recursion_limit - 1) + * else: + * quicksort(lower, pivot_pos, recursion_limit - 1) + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * + * + * the below code implements this converted to an iteration and as an + * additional minor optimization skips the recursion depth checking on the + * smaller partition as it is always less than half of the remaining data and + * will thus terminate fast enough + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_cpu_features.h" +#include "npy_sort.h" +#include "npysort_common.h" +#include "npysort_heapsort.h" +#include "numpy_tag.h" +#include "x86_simd_qsort.hpp" +#include "highway_qsort.hpp" + +#include +#include + +#define NOT_USED NPY_UNUSED(unused) + +/* + * pushing largest partition has upper bound of log2(n) space + * we store two pointers each time + */ +#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + +NPY_NO_EXPORT int +npy_quicksort(void *start, npy_intp num, void *varr) +{ + npy_intp elsize; + PyArray_CompareFunc *cmp; + get_sort_data_from_array(varr, &elsize, &cmp); + + return npy_quicksort_impl(start, num, varr, elsize, cmp); +} + +NPY_NO_EXPORT int +npy_quicksort_impl(void *start, npy_intp num, void *varr, + npy_intp elsize, PyArray_CompareFunc *cmp) +{ + void *arr = varr; + char *vp; + char *pl = (char *)start; + char *pr = pl + (num - 1) * elsize; + char *stack[PYA_QS_STACK]; + char **sptr = stack; + char *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + vp = (char *)malloc(elsize); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_heapsort(pl, (pr - pl) / elsize + 1, varr); + goto stack_pop; + } + while (pr - pl > SMALL_QUICKSORT * elsize) { + /* quicksort partition */ + pm = pl + (((pr - pl) / elsize) >> 1) * elsize; + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + if (cmp(pr, pm, arr) < 0) { + GENERIC_SWAP(pr, pm, elsize); + } + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + GENERIC_COPY(vp, pm, elsize); + pi = pl; + pj = pr - elsize; + GENERIC_SWAP(pm, pj, elsize); + /* + * Generic comparisons may be buggy, so don't rely on the sentinels + * to keep the pointers from going out of bounds. + */ + for (;;) { + do { + pi += elsize; + } while (cmp(pi, vp, arr) < 0 && pi < pj); + do { + pj -= elsize; + } while (cmp(vp, pj, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + GENERIC_SWAP(pi, pj, elsize); + } + pk = pr - elsize; + GENERIC_SWAP(pi, pk, elsize); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + elsize; + *sptr++ = pr; + pr = pi - elsize; + } + else { + *sptr++ = pl; + *sptr++ = pi - elsize; + pl = pi + elsize; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + elsize; pi <= pr; pi += elsize) { + GENERIC_COPY(vp, pi, elsize); + pj = pi; + pk = pi - elsize; + while (pj > pl && cmp(vp, pk, arr) < 0) { + GENERIC_COPY(pj, pk, elsize); + pj -= elsize; + pk -= elsize; + } + GENERIC_COPY(pj, vp, elsize); + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + +NPY_NO_EXPORT int +npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) +{ + npy_intp elsize; + PyArray_CompareFunc *cmp; + get_sort_data_from_array(varr, &elsize, &cmp); + + return npy_aquicksort_impl(vv, tosort, num, varr, elsize, cmp); +} + +NPY_NO_EXPORT int +npy_aquicksort_impl(void *vv, npy_intp *tosort, npy_intp num, void *varr, + npy_intp elsize, PyArray_CompareFunc *cmp) +{ + void *arr = varr; + char *v = (char *)vv; + char *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_aheapsort(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + if (cmp(v + (*pr) * elsize, v + (*pm) * elsize, arr) < 0) { + INTP_SWAP(*pr, *pm); + } + if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + vp = v + (*pm) * elsize; + pi = pl; + pj = pr - 1; + INTP_SWAP(*pm, *pj); + for (;;) { + do { + ++pi; + } while (cmp(v + (*pi) * elsize, vp, arr) < 0 && pi < pj); + do { + --pj; + } while (cmp(vp, v + (*pj) * elsize, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + INTP_SWAP(*pi, *pj); + } + pk = pr - 1; + INTP_SWAP(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi * elsize; + pj = pi; + pk = pi - 1; + while (pj > pl && cmp(vp, v + (*pk) * elsize, arr) < 0) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} diff --git a/numpy/_core/src/npysort/quicksort_generic.hpp b/numpy/_core/src/npysort/quicksort_generic.hpp new file mode 100644 index 000000000000..31ceee55cd40 --- /dev/null +++ b/numpy/_core/src/npysort/quicksort_generic.hpp @@ -0,0 +1,98 @@ +#ifndef NUMPY_SRC_COMMON_NPYSORT_QUICKSORT_HPP +#define NUMPY_SRC_COMMON_NPYSORT_QUICKSORT_HPP + +#include "heapsort.hpp" +#include "common.hpp" + +namespace np::sort { + +// pushing largest partition has upper bound of log2(n) space +// we store two pointers each time +constexpr size_t kQuickStack = sizeof(intptr_t) * 8 * 2; +constexpr ptrdiff_t kQuickSmall = 15; + +// NUMERIC SORTS +// ``reverse=true`` performs a descending sort using the same comparator, +// preserving NaN-at-end semantics for floating-point inputs. +template +inline void Quick(T *start, SSize num) +{ + T vp; + T *pl = start; + T *pr = pl + num - 1; + T *stack[kQuickStack]; + T **sptr = stack; + T *pm, *pi, *pj, *pk; + int depth[kQuickStack]; + int *psdepth = depth; + int cdepth = BitScanReverse(static_cast>(num)) * 2; + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + Heap(pl, pr - pl + 1); + goto stack_pop; + } + while ((pr - pl) > kQuickSmall) { + // quicksort partition + pm = pl + ((pr - pl) >> 1); + if (Cmp(*pm, *pl)) { + std::swap(*pm, *pl); + } + if (Cmp(*pr, *pm)) { + std::swap(*pr, *pm); + } + if (Cmp(*pm, *pl)) { + std::swap(*pm, *pl); + } + vp = *pm; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do { + ++pi; + } while (Cmp(*pi, vp)); + do { + --pj; + } while (Cmp(vp, *pj)); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + // push largest partition on stack + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vp = *pi; + pj = pi; + pk = pi - 1; + while (pj > pl && Cmp(vp, *pk)) { + *pj-- = *pk--; + } + *pj = vp; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } +} +} // np::sort +#endif // NUMPY_SRC_COMMON_NPYSORT_QUICK_HPP diff --git a/numpy/_core/src/npysort/radixsort.cpp b/numpy/_core/src/npysort/radixsort.hpp similarity index 63% rename from numpy/_core/src/npysort/radixsort.cpp rename to numpy/_core/src/npysort/radixsort.hpp index 0e1a41c69cbe..ce05a19a766e 100644 --- a/numpy/_core/src/npysort/radixsort.cpp +++ b/numpy/_core/src/npysort/radixsort.hpp @@ -139,6 +139,14 @@ radixsort(void *start, npy_intp num) return radixsort_((UT *)start, num); } +// ``PyArray_SortFunc``-shaped trampoline. +template +static int +radixsort_impl(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return radixsort(start, num); +} + template static npy_intp * aradixsort0(UT *start, npy_intp *aux, npy_intp *tosort, npy_intp num) @@ -236,121 +244,10 @@ aradixsort(void *start, npy_intp *tosort, npy_intp num) return aradixsort_((UT *)start, tosort, num); } -extern "C" { -NPY_NO_EXPORT int -radixsort_bool(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_byte(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_ubyte(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_short(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_ushort(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_int(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_uint(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_long(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_ulong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_longlong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -radixsort_ulonglong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return radixsort(vec, cnt); -} -NPY_NO_EXPORT int -aradixsort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_ubyte(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_short(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_ushort(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_int(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_long(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_ulong(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_longlong(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) -{ - return aradixsort(vec, ind, cnt); -} -NPY_NO_EXPORT int -aradixsort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt, - void *NPY_UNUSED(null)) +// ``PyArray_ArgSortFunc``-shaped trampoline. +template +static int +aradixsort_impl(void *start, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) { - return aradixsort(vec, ind, cnt); -} + return aradixsort(start, tosort, num); } diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp deleted file mode 100644 index 0dfb4d32f64a..000000000000 --- a/numpy/_core/src/npysort/timsort.cpp +++ /dev/null @@ -1,2926 +0,0 @@ -/* -*- c -*- */ - -/* - * The purpose of this module is to add faster sort functions - * that are type-specific. This is done by altering the - * function table for the builtin descriptors. - * - * These sorting functions are copied almost directly from numarray - * with a few modifications (complex comparisons compare the imaginary - * part if the real parts are equal, for example), and the names - * are changed. - * - * The original sorting code is due to Charles R. Harris who wrote - * it for numarray. - */ - -/* - * Quick sort is usually the fastest, but the worst case scenario can - * be slower than the merge and heap sorts. The merge sort requires - * extra memory and so for large arrays may not be useful. - * - * The merge sort is *stable*, meaning that equal components - * are unmoved from their entry versions, so it can be used to - * implement lexicographic sorting on multiple keys. - * - * The heap sort is included for completeness. - */ - -/* For details of Timsort, refer to - * https://github.com/python/cpython/blob/3.7/Objects/listsort.txt - */ - -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "npy_sort.h" -#include "npysort_common.h" -#include "numpy_tag.h" - -#include -#include - -/* enough for 32 * 1.618 ** 128 elements. - If powersort was used in all cases, 90 would suffice, as 32 * 2 ** 90 >= 32 * 1.618 ** 128 */ -#define RUN_STACK_SIZE 128 - -static npy_intp -compute_min_run(npy_intp num) -{ - npy_intp r = 0; - - while (64 < num) { - r |= num & 1; - num >>= 1; - } - - return num + r; -} - -typedef struct { - npy_intp s; /* start pointer */ - npy_intp l; /* length */ - int power; /* node "level" for powersort merge strategy */ -} run; - -/* buffer for argsort. Declared here to avoid multiple declarations. */ -typedef struct { - npy_intp *pw; - npy_intp size; -} buffer_intp; - -/* buffer method */ -static inline int -resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) -{ - if (new_size <= buffer->size) { - return 0; - } - - npy_intp *new_pw = (npy_intp *)realloc(buffer->pw, new_size * sizeof(npy_intp)); - - buffer->size = new_size; - - if (NPY_UNLIKELY(new_pw == NULL)) { - return -NPY_ENOMEM; - } - else { - buffer->pw = new_pw; - return 0; - } -} - -/* - ***************************************************************************** - ** NUMERIC SORTS ** - ***************************************************************************** - */ - -template -struct buffer_ { - typename Tag::type *pw; - npy_intp size; -}; - -template -static inline int -resize_buffer_(buffer_ *buffer, npy_intp new_size) -{ - using type = typename Tag::type; - if (new_size <= buffer->size) { - return 0; - } - - type *new_pw = (type *)realloc(buffer->pw, new_size * sizeof(type)); - buffer->size = new_size; - - if (NPY_UNLIKELY(new_pw == NULL)) { - return -NPY_ENOMEM; - } - else { - buffer->pw = new_pw; - return 0; - } -} - -template -static npy_intp -count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun) -{ - npy_intp sz; - type vc, *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = arr + l; - - /* (not strictly) ascending sequence */ - if (!Tag::less(*(pl + 1), *pl)) { - for (pi = pl + 1; pi < arr + num - 1 && !Tag::less(*(pi + 1), *pi); - ++pi) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + 1; pi < arr + num - 1 && Tag::less(*(pi + 1), *pi); - ++pi) { - } - - for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - std::swap(*pj, *pr); - } - } - - ++pi; - sz = pi - pl; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz; - - /* insertion sort */ - for (; pi < pr; ++pi) { - vc = *pi; - pj = pi; - - while (pl < pj && Tag::less(vc, *(pj - 1))) { - *pj = *(pj - 1); - --pj; - } - - *pj = vc; - } - } - - return sz; -} - -/* when the left part of the array (p1) is smaller, copy p1 to buffer - * and merge from left to right - */ -template -static void -merge_left_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) -{ - type *end = p2 + l2; - memcpy(p3, p1, sizeof(type) * l1); - /* first element must be in p2 otherwise skipped in the caller */ - *p1++ = *p2++; - - while (p1 < p2 && p2 < end) { - if (Tag::less(*p2, *p3)) { - *p1++ = *p2++; - } - else { - *p1++ = *p3++; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(type) * (p2 - p1)); - } -} - -/* when the right part of the array (p2) is smaller, copy p2 to buffer - * and merge from right to left - */ -template -static void -merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) -{ - npy_intp ofs; - type *start = p1 - 1; - memcpy(p3, p2, sizeof(type) * l2); - p1 += l1 - 1; - p2 += l2 - 1; - p3 += l2 - 1; - /* first element must be in p1 otherwise skipped in the caller */ - *p2-- = *p1--; - - while (p1 < p2 && start < p1) { - if (Tag::less(*p3, *p1)) { - *p2-- = *p1--; - } - else { - *p2-- = *p3--; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + 1, p3 - ofs + 1, sizeof(type) * ofs); - } -} - -/* Note: the naming convention of gallop functions are different from that of - * CPython. For example, here gallop_right means gallop from left toward right, - * whereas in CPython gallop_right means gallop - * and find the right most element among equal elements - */ -template -static npy_intp -gallop_right_(const type *arr, const npy_intp size, const type key) -{ - npy_intp last_ofs, ofs, m; - - if (Tag::less(key, arr[0])) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (Tag::less(key, arr[ofs])) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[last_ofs] <= key < arr[ofs] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (Tag::less(key, arr[m])) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[ofs-1] <= key < arr[ofs] */ - return ofs; -} - -template -static npy_intp -gallop_left_(const type *arr, const npy_intp size, const type key) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (Tag::less(arr[size - 1], key)) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (Tag::less(arr[size - ofs - 1], key)) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[size-ofs-1] < key <= arr[size-last_ofs-1] */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (Tag::less(arr[m], key)) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[r-1] < key <= arr[r] */ - return r; -} - -template -static int -merge_at_(type *arr, const run *stack, const npy_intp at, buffer_ *buffer) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - type *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* arr[s2] belongs to arr[s1+k]. - * if try to comment this out for debugging purpose, remember - * in the merging process the first element is skipped - */ - k = gallop_right_(arr + s1, l1, arr[s2]); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = arr + s1 + k; - l1 -= k; - p2 = arr + s2; - /* arr[s2-1] belongs to arr[s2+l2] */ - l2 = gallop_left_(arr + s2, l2, arr[s2 - 1]); - - if (l2 < l1) { - ret = resize_buffer_(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - merge_right_(p1, l1, p2, l2, buffer->pw); - } - else { - ret = resize_buffer_(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - merge_left_(p1, l1, p2, l2, buffer->pw); - } - - return 0; -} - -/* See https://github.com/python/cpython/blob/ea23c897cd25702e72a04e06664f6864f07a7c5d/Objects/listsort.txt -* for a detailed explanation. -* In CPython, *num* is called *n*, but we changed it for consistency with the NumPy implementation. -*/ -static int -powerloop(npy_intp s1, npy_intp n1, npy_intp n2, npy_intp num) -{ - int result = 0; - npy_intp a = 2 * s1 + n1; /* 2*a */ - npy_intp b = a + n1 + n2; /* 2*b */ - for (;;) { - ++result; - if (a >= num) { /* both quotient bits are 1 */ - a -= num; - b -= num; - } - else if (b >= num) { /* a/num bit is 0, b/num bit is 1 */ - break; - } - a <<= 1; - b <<= 1; - } - return result; -} - -template -static int -found_new_run_(type *arr, run *stack, npy_intp *stack_ptr, npy_intp n2, - npy_intp num, buffer_ *buffer) -{ - int ret; - if (*stack_ptr > 0) { - npy_intp s1 = stack[*stack_ptr - 1].s; - npy_intp n1 = stack[*stack_ptr - 1].l; - int power = powerloop(s1, n1, n2, num); - while (*stack_ptr > 1 && stack[*stack_ptr - 2].power > power) { - ret = merge_at_(arr, stack, *stack_ptr - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - stack[*stack_ptr - 2].l += stack[*stack_ptr - 1].l; - --(*stack_ptr); - } - stack[*stack_ptr - 1].power = power; - } - return 0; -} - -template -static int -force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, - buffer_ *buffer) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = merge_at_(arr, stack, top - 3, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = merge_at_(arr, stack, top - 2, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = merge_at_(arr, stack, top - 2, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -template -static int -timsort_(void *start, npy_intp num) -{ - using type = typename Tag::type; - int ret; - npy_intp l, n, stack_ptr, minrun; - buffer_ buffer; - run stack[RUN_STACK_SIZE]; - buffer.pw = NULL; - buffer.size = 0; - stack_ptr = 0; - minrun = compute_min_run(num); - - for (l = 0; l < num;) { - n = count_run_((type *)start, l, num, minrun); - ret = found_new_run_((type *)start, stack, &stack_ptr, n, num, &buffer); - if (NPY_UNLIKELY(ret < 0)) - goto cleanup; - - // Push the new run onto the stack. - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - l += n; - } - - ret = force_collapse_((type *)start, stack, &stack_ptr, &buffer); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; -cleanup: - - free(buffer.pw); - - return ret; -} - -/* argsort */ - -template -static npy_intp -acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun) -{ - npy_intp sz; - type vc; - npy_intp vi; - npy_intp *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = tosort + l; - - /* (not strictly) ascending sequence */ - if (!Tag::less(arr[*(pl + 1)], arr[*pl])) { - for (pi = pl + 1; - pi < tosort + num - 1 && !Tag::less(arr[*(pi + 1)], arr[*pi]); - ++pi) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + 1; - pi < tosort + num - 1 && Tag::less(arr[*(pi + 1)], arr[*pi]); - ++pi) { - } - - for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - std::swap(*pj, *pr); - } - } - - ++pi; - sz = pi - pl; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz; - - /* insertion sort */ - for (; pi < pr; ++pi) { - vi = *pi; - vc = arr[*pi]; - pj = pi; - - while (pl < pj && Tag::less(vc, arr[*(pj - 1)])) { - *pj = *(pj - 1); - --pj; - } - - *pj = vi; - } - } - - return sz; -} - -template -static npy_intp -agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, - const type key) -{ - npy_intp last_ofs, ofs, m; - - if (Tag::less(key, arr[tosort[0]])) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (Tag::less(key, arr[tosort[ofs]])) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[last_ofs]] <= key < arr[tosort[ofs]] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (Tag::less(key, arr[tosort[m]])) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[tosort[ofs-1]] <= key < arr[tosort[ofs]] */ - return ofs; -} - -template -static npy_intp -agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, - const type key) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (Tag::less(arr[tosort[size - 1]], key)) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (Tag::less(arr[tosort[size - ofs - 1]], key)) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] - */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (Tag::less(arr[tosort[m]], key)) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[tosort[r-1]] < key <= arr[tosort[r]] */ - return r; -} - -template -static void -amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, - npy_intp *p3) -{ - npy_intp *end = p2 + l2; - memcpy(p3, p1, sizeof(npy_intp) * l1); - /* first element must be in p2 otherwise skipped in the caller */ - *p1++ = *p2++; - - while (p1 < p2 && p2 < end) { - if (Tag::less(arr[*p2], arr[*p3])) { - *p1++ = *p2++; - } - else { - *p1++ = *p3++; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); - } -} - -template -static void -amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, - npy_intp *p3) -{ - npy_intp ofs; - npy_intp *start = p1 - 1; - memcpy(p3, p2, sizeof(npy_intp) * l2); - p1 += l1 - 1; - p2 += l2 - 1; - p3 += l2 - 1; - /* first element must be in p1 otherwise skipped in the caller */ - *p2-- = *p1--; - - while (p1 < p2 && start < p1) { - if (Tag::less(arr[*p3], arr[*p1])) { - *p2-- = *p1--; - } - else { - *p2-- = *p3--; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); - } -} - -template -static int -amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, - buffer_intp *buffer) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - npy_intp *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* tosort[s2] belongs to tosort[s1+k] */ - k = agallop_right_(arr, tosort + s1, l1, arr[tosort[s2]]); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = tosort + s1 + k; - l1 -= k; - p2 = tosort + s2; - /* tosort[s2-1] belongs to tosort[s2+l2] */ - l2 = agallop_left_(arr, tosort + s2, l2, arr[tosort[s2 - 1]]); - - if (l2 < l1) { - ret = resize_buffer_intp(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - amerge_right_(arr, p1, l1, p2, l2, buffer->pw); - } - else { - ret = resize_buffer_intp(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - amerge_left_(arr, p1, l1, p2, l2, buffer->pw); - } - - return 0; -} - -template -static int -afound_new_run_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, npy_intp n2, - npy_intp num, buffer_intp *buffer) -{ - int ret; - if (*stack_ptr > 0) { - npy_intp s1 = stack[*stack_ptr - 1].s; - npy_intp n1 = stack[*stack_ptr - 1].l; - int power = powerloop(s1, n1, n2, num); - while (*stack_ptr > 1 && stack[*stack_ptr - 2].power > power) { - ret = amerge_at_(arr, tosort, stack, *stack_ptr - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - stack[*stack_ptr - 2].l += stack[*stack_ptr - 1].l; - --(*stack_ptr); - } - stack[*stack_ptr - 1].power = power; - } - return 0; -} - -template -static int -aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, - buffer_intp *buffer) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = amerge_at_(arr, tosort, stack, top - 3, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -template -static int -atimsort_(void *v, npy_intp *tosort, npy_intp num) -{ - using type = typename Tag::type; - int ret; - npy_intp l, n, stack_ptr, minrun; - buffer_intp buffer; - run stack[RUN_STACK_SIZE]; - buffer.pw = NULL; - buffer.size = 0; - stack_ptr = 0; - minrun = compute_min_run(num); - - for (l = 0; l < num;) { - n = acount_run_((type *)v, tosort, l, num, minrun); - ret = afound_new_run_((type*)v, tosort, stack, &stack_ptr, n, num, &buffer); - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - l += n; - } - - ret = aforce_collapse_((type *)v, tosort, stack, &stack_ptr, &buffer); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; -cleanup: - - if (buffer.pw != NULL) { - free(buffer.pw); - } - - return ret; -} - -/* For string sorts and generic sort, element comparisons are very expensive, - * and the time cost of insertion sort (involves N**2 comparison) clearly - * hurts. Implementing binary insertion sort and probably gallop mode during - * merging process can hopefully boost the performance. Here as a temporary - * workaround we use shorter run length to reduce the cost of insertion sort. - */ - -static npy_intp -compute_min_run_short(npy_intp num) -{ - npy_intp r = 0; - - while (16 < num) { - r |= num & 1; - num >>= 1; - } - - return num + r; -} - -/* - ***************************************************************************** - ** STRING SORTS ** - ***************************************************************************** - */ - -template -struct string_buffer_ { - typename Tag::type *pw; - npy_intp size; - size_t len; -}; - -template -static inline int -resize_buffer_(string_buffer_ *buffer, npy_intp new_size) -{ - using type = typename Tag::type; - if (new_size <= buffer->size) { - return 0; - } - - type *new_pw = (type *)realloc(buffer->pw, sizeof(type) * new_size * buffer->len); - buffer->size = new_size; - - if (NPY_UNLIKELY(new_pw == NULL)) { - return -NPY_ENOMEM; - } - else { - buffer->pw = new_pw; - return 0; - } -} - -template -static npy_intp -count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun, type *vp, - size_t len) -{ - npy_intp sz; - type *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = arr + l * len; - - /* (not strictly) ascending sequence */ - if (!Tag::less(pl + len, pl, len)) { - for (pi = pl + len; - pi < arr + (num - 1) * len && !Tag::less(pi + len, pi, len); - pi += len) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + len; - pi < arr + (num - 1) * len && Tag::less(pi + len, pi, len); - pi += len) { - } - - for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { - Tag::swap(pj, pr, len); - } - } - - pi += len; - sz = (pi - pl) / len; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz * len; - - /* insertion sort */ - for (; pi < pr; pi += len) { - Tag::copy(vp, pi, len); - pj = pi; - - while (pl < pj && Tag::less(vp, pj - len, len)) { - Tag::copy(pj, pj - len, len); - pj -= len; - } - - Tag::copy(pj, vp, len); - } - } - - return sz; -} - -template -static npy_intp -gallop_right_(const typename Tag::type *arr, const npy_intp size, - const typename Tag::type *key, size_t len) -{ - npy_intp last_ofs, ofs, m; - - if (Tag::less(key, arr, len)) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (Tag::less(key, arr + ofs * len, len)) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[last_ofs*len] <= key < arr[ofs*len] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (Tag::less(key, arr + m * len, len)) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */ - return ofs; -} - -template -static npy_intp -gallop_left_(const typename Tag::type *arr, const npy_intp size, - const typename Tag::type *key, size_t len) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (Tag::less(arr + (size - 1) * len, key, len)) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (Tag::less(arr + (size - ofs - 1) * len, key, len)) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (Tag::less(arr + m * len, key, len)) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[(r-1)*len] < key <= arr[r*len] */ - return r; -} - -template -static void -merge_left_(typename Tag::type *p1, npy_intp l1, typename Tag::type *p2, - npy_intp l2, typename Tag::type *p3, size_t len) -{ - using type = typename Tag::type; - type *end = p2 + l2 * len; - memcpy(p3, p1, sizeof(type) * l1 * len); - /* first element must be in p2 otherwise skipped in the caller */ - Tag::copy(p1, p2, len); - p1 += len; - p2 += len; - - while (p1 < p2 && p2 < end) { - if (Tag::less(p2, p3, len)) { - Tag::copy(p1, p2, len); - p1 += len; - p2 += len; - } - else { - Tag::copy(p1, p3, len); - p1 += len; - p3 += len; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(type) * (p2 - p1)); - } -} - -template -static void -merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3, - size_t len) -{ - npy_intp ofs; - type *start = p1 - len; - memcpy(p3, p2, sizeof(type) * l2 * len); - p1 += (l1 - 1) * len; - p2 += (l2 - 1) * len; - p3 += (l2 - 1) * len; - /* first element must be in p1 otherwise skipped in the caller */ - Tag::copy(p2, p1, len); - p2 -= len; - p1 -= len; - - while (p1 < p2 && start < p1) { - if (Tag::less(p3, p1, len)) { - Tag::copy(p2, p1, len); - p2 -= len; - p1 -= len; - } - else { - Tag::copy(p2, p3, len); - p2 -= len; - p3 -= len; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + len, p3 - ofs + len, sizeof(type) * ofs); - } -} - -template -static int -merge_at_(type *arr, const run *stack, const npy_intp at, - string_buffer_ *buffer, size_t len) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - type *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* arr[s2] belongs to arr[s1+k] */ - Tag::copy(buffer->pw, arr + s2 * len, len); - k = gallop_right_(arr + s1 * len, l1, buffer->pw, len); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = arr + (s1 + k) * len; - l1 -= k; - p2 = arr + s2 * len; - /* arr[s2-1] belongs to arr[s2+l2] */ - Tag::copy(buffer->pw, arr + (s2 - 1) * len, len); - l2 = gallop_left_(arr + s2 * len, l2, buffer->pw, len); - - if (l2 < l1) { - ret = resize_buffer_(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - merge_right_(p1, l1, p2, l2, buffer->pw, len); - } - else { - ret = resize_buffer_(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - merge_left_(p1, l1, p2, l2, buffer->pw, len); - } - - return 0; -} - -template -static int -try_collapse_(type *arr, run *stack, npy_intp *stack_ptr, - string_buffer_ *buffer, size_t len) -{ - int ret; - npy_intp A, B, C, top; - top = *stack_ptr; - - while (1 < top) { - B = stack[top - 2].l; - C = stack[top - 1].l; - - if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { - A = stack[top - 3].l; - - if (A <= C) { - ret = merge_at_(arr, stack, top - 3, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += B; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = merge_at_(arr, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - } - else if (1 < top && B <= C) { - ret = merge_at_(arr, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - else { - break; - } - } - - *stack_ptr = top; - return 0; -} - -template -static int -force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, - string_buffer_ *buffer, size_t len) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = merge_at_(arr, stack, top - 3, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = merge_at_(arr, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = merge_at_(arr, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -template -NPY_NO_EXPORT int -string_timsort_(void *start, npy_intp num, void *varr) -{ - using type = typename Tag::type; - PyArrayObject *arr = reinterpret_cast(varr); - size_t elsize = PyArray_ITEMSIZE(arr); - size_t len = elsize / sizeof(type); - int ret; - npy_intp l, n, stack_ptr, minrun; - run stack[RUN_STACK_SIZE]; - string_buffer_ buffer; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - buffer.pw = NULL; - buffer.size = 0; - buffer.len = len; - stack_ptr = 0; - minrun = compute_min_run_short(num); - /* used for insertion sort and gallop key */ - ret = resize_buffer_(&buffer, 1); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - for (l = 0; l < num;) { - n = count_run_((type *)start, l, num, minrun, buffer.pw, len); - /* both s and l are scaled by len */ - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - ret = try_collapse_((type *)start, stack, &stack_ptr, &buffer, - len); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - l += n; - } - - ret = force_collapse_((type *)start, stack, &stack_ptr, &buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; - -cleanup: - if (buffer.pw != NULL) { - free(buffer.pw); - } - return ret; -} - -/* argsort */ - -template -static npy_intp -acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun, size_t len) -{ - npy_intp sz; - npy_intp vi; - npy_intp *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = tosort + l; - - /* (not strictly) ascending sequence */ - if (!Tag::less(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) { - for (pi = pl + 1; - pi < tosort + num - 1 && - !Tag::less(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); - ++pi) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + 1; - pi < tosort + num - 1 && - Tag::less(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); - ++pi) { - } - - for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - std::swap(*pj, *pr); - } - } - - ++pi; - sz = pi - pl; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz; - - /* insertion sort */ - for (; pi < pr; ++pi) { - vi = *pi; - pj = pi; - - while (pl < pj && - Tag::less(arr + vi * len, arr + (*(pj - 1)) * len, len)) { - *pj = *(pj - 1); - --pj; - } - - *pj = vi; - } - } - - return sz; -} - -template -static npy_intp -agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, - const type *key, size_t len) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (Tag::less(arr + tosort[size - 1] * len, key, len)) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (Tag::less(arr + tosort[size - ofs - 1] * len, key, len)) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[size-ofs-1]*len] < key <= - * arr[tosort[size-last_ofs-1]*len] */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (Tag::less(arr + tosort[m] * len, key, len)) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */ - return r; -} - -template -static npy_intp -agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, - const type *key, size_t len) -{ - npy_intp last_ofs, ofs, m; - - if (Tag::less(key, arr + tosort[0] * len, len)) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (Tag::less(key, arr + tosort[ofs] * len, len)) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (Tag::less(key, arr + tosort[m] * len, len)) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */ - return ofs; -} - -template -static void -amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, - npy_intp *p3, size_t len) -{ - npy_intp *end = p2 + l2; - memcpy(p3, p1, sizeof(npy_intp) * l1); - /* first element must be in p2 otherwise skipped in the caller */ - *p1++ = *p2++; - - while (p1 < p2 && p2 < end) { - if (Tag::less(arr + (*p2) * len, arr + (*p3) * len, len)) { - *p1++ = *p2++; - } - else { - *p1++ = *p3++; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); - } -} - -template -static void -amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, - npy_intp *p3, size_t len) -{ - npy_intp ofs; - npy_intp *start = p1 - 1; - memcpy(p3, p2, sizeof(npy_intp) * l2); - p1 += l1 - 1; - p2 += l2 - 1; - p3 += l2 - 1; - /* first element must be in p1 otherwise skipped in the caller */ - *p2-- = *p1--; - - while (p1 < p2 && start < p1) { - if (Tag::less(arr + (*p3) * len, arr + (*p1) * len, len)) { - *p2-- = *p1--; - } - else { - *p2-- = *p3--; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); - } -} - -template -static int -amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, - buffer_intp *buffer, size_t len) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - npy_intp *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* tosort[s2] belongs to tosort[s1+k] */ - k = agallop_right_(arr, tosort + s1, l1, arr + tosort[s2] * len, len); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = tosort + s1 + k; - l1 -= k; - p2 = tosort + s2; - /* tosort[s2-1] belongs to tosort[s2+l2] */ - l2 = agallop_left_(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, - len); - - if (l2 < l1) { - ret = resize_buffer_intp(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - amerge_right_(arr, p1, l1, p2, l2, buffer->pw, len); - } - else { - ret = resize_buffer_intp(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - amerge_left_(arr, p1, l1, p2, l2, buffer->pw, len); - } - - return 0; -} - -template -static int -atry_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, - buffer_intp *buffer, size_t len) -{ - int ret; - npy_intp A, B, C, top; - top = *stack_ptr; - - while (1 < top) { - B = stack[top - 2].l; - C = stack[top - 1].l; - - if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { - A = stack[top - 3].l; - - if (A <= C) { - ret = amerge_at_(arr, tosort, stack, top - 3, buffer, - len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += B; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer, - len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - } - else if (1 < top && B <= C) { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - else { - break; - } - } - - *stack_ptr = top; - return 0; -} - -template -static int -aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, - buffer_intp *buffer, size_t len) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = amerge_at_(arr, tosort, stack, top - 3, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -template -NPY_NO_EXPORT int -string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) -{ - using type = typename Tag::type; - PyArrayObject *arr = reinterpret_cast(varr); - size_t elsize = PyArray_ITEMSIZE(arr); - size_t len = elsize / sizeof(type); - int ret; - npy_intp l, n, stack_ptr, minrun; - run stack[RUN_STACK_SIZE]; - buffer_intp buffer; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - buffer.pw = NULL; - buffer.size = 0; - stack_ptr = 0; - minrun = compute_min_run_short(num); - - for (l = 0; l < num;) { - n = acount_run_((type *)start, tosort, l, num, minrun, len); - /* both s and l are scaled by len */ - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - ret = atry_collapse_((type *)start, tosort, stack, &stack_ptr, - &buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - l += n; - } - - ret = aforce_collapse_((type *)start, tosort, stack, &stack_ptr, - &buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; - -cleanup: - if (buffer.pw != NULL) { - free(buffer.pw); - } - return ret; -} - -/* - ***************************************************************************** - ** GENERIC SORT ** - ***************************************************************************** - */ - -typedef struct { - char *pw; - npy_intp size; - size_t len; -} buffer_char; - -static inline int -resize_buffer_char(buffer_char *buffer, npy_intp new_size) -{ - if (new_size <= buffer->size) { - return 0; - } - - char *new_pw = (char *)realloc(buffer->pw, sizeof(char) * new_size * buffer->len); - buffer->size = new_size; - - if (NPY_UNLIKELY(new_pw == NULL)) { - return -NPY_ENOMEM; - } - else { - buffer->pw = new_pw; - return 0; - } -} - -static npy_intp -npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, char *vp, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp sz; - char *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = arr + l * len; - - /* (not strictly) ascending sequence */ - if (cmp(pl, pl + len, py_arr) <= 0) { - for (pi = pl + len; - pi < arr + (num - 1) * len && cmp(pi, pi + len, py_arr) <= 0; - pi += len) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + len; - pi < arr + (num - 1) * len && cmp(pi + len, pi, py_arr) < 0; - pi += len) { - } - - for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { - GENERIC_SWAP(pj, pr, len); - } - } - - pi += len; - sz = (pi - pl) / len; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz * len; - - /* insertion sort */ - for (; pi < pr; pi += len) { - GENERIC_COPY(vp, pi, len); - pj = pi; - - while (pl < pj && cmp(vp, pj - len, py_arr) < 0) { - GENERIC_COPY(pj, pj - len, len); - pj -= len; - } - - GENERIC_COPY(pj, vp, len); - } - } - - return sz; -} - -static npy_intp -npy_gallop_right(const char *arr, const npy_intp size, const char *key, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp last_ofs, ofs, m; - - if (cmp(key, arr, py_arr) < 0) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (cmp(key, arr + ofs * len, py_arr) < 0) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[last_ofs*len] <= key < arr[ofs*len] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (cmp(key, arr + m * len, py_arr) < 0) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */ - return ofs; -} - -static npy_intp -npy_gallop_left(const char *arr, const npy_intp size, const char *key, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (cmp(arr + (size - 1) * len, key, py_arr) < 0) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (cmp(arr + (size - ofs - 1) * len, key, py_arr) < 0) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (cmp(arr + m * len, key, py_arr) < 0) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[(r-1)*len] < key <= arr[r*len] */ - return r; -} - -static void -npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - char *end = p2 + l2 * len; - memcpy(p3, p1, sizeof(char) * l1 * len); - /* first element must be in p2 otherwise skipped in the caller */ - GENERIC_COPY(p1, p2, len); - p1 += len; - p2 += len; - - while (p1 < p2 && p2 < end) { - if (cmp(p2, p3, py_arr) < 0) { - GENERIC_COPY(p1, p2, len); - p1 += len; - p2 += len; - } - else { - GENERIC_COPY(p1, p3, len); - p1 += len; - p3 += len; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(char) * (p2 - p1)); - } -} - -static void -npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp ofs; - char *start = p1 - len; - memcpy(p3, p2, sizeof(char) * l2 * len); - p1 += (l1 - 1) * len; - p2 += (l2 - 1) * len; - p3 += (l2 - 1) * len; - /* first element must be in p1 otherwise skipped in the caller */ - GENERIC_COPY(p2, p1, len); - p2 -= len; - p1 -= len; - - while (p1 < p2 && start < p1) { - if (cmp(p3, p1, py_arr) < 0) { - GENERIC_COPY(p2, p1, len); - p2 -= len; - p1 -= len; - } - else { - GENERIC_COPY(p2, p3, len); - p2 -= len; - p3 -= len; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + len, p3 - ofs + len, sizeof(char) * ofs); - } -} - -static int -npy_merge_at(char *arr, const run *stack, const npy_intp at, - buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - char *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* arr[s2] belongs to arr[s1+k] */ - GENERIC_COPY(buffer->pw, arr + s2 * len, len); - k = npy_gallop_right(arr + s1 * len, l1, buffer->pw, len, cmp, py_arr); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = arr + (s1 + k) * len; - l1 -= k; - p2 = arr + s2 * len; - /* arr[s2-1] belongs to arr[s2+l2] */ - GENERIC_COPY(buffer->pw, arr + (s2 - 1) * len, len); - l2 = npy_gallop_left(arr + s2 * len, l2, buffer->pw, len, cmp, py_arr); - - if (l2 < l1) { - ret = resize_buffer_char(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - npy_merge_right(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } - else { - ret = resize_buffer_char(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - npy_merge_left(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } - - return 0; -} - -static int -npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, - buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - int ret; - npy_intp A, B, C, top; - top = *stack_ptr; - - while (1 < top) { - B = stack[top - 2].l; - C = stack[top - 1].l; - - if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { - A = stack[top - 3].l; - - if (A <= C) { - ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += B; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - } - else if (1 < top && B <= C) { - ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - else { - break; - } - } - - *stack_ptr = top; - return 0; -} - -static int -npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, - buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -NPY_NO_EXPORT int -npy_timsort(void *start, npy_intp num, void *varr) -{ - PyArrayObject *arr = reinterpret_cast(varr); - size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; - int ret; - npy_intp l, n, stack_ptr, minrun; - run stack[RUN_STACK_SIZE]; - buffer_char buffer; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - buffer.pw = NULL; - buffer.size = 0; - buffer.len = len; - stack_ptr = 0; - minrun = compute_min_run_short(num); - - /* used for insertion sort and gallop key */ - ret = resize_buffer_char(&buffer, len); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - for (l = 0; l < num;) { - n = npy_count_run((char *)start, l, num, minrun, buffer.pw, len, cmp, - arr); - - /* both s and l are scaled by len */ - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - ret = npy_try_collapse((char *)start, stack, &stack_ptr, &buffer, len, - cmp, arr); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - l += n; - } - - ret = npy_force_collapse((char *)start, stack, &stack_ptr, &buffer, len, - cmp, arr); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; - -cleanup: - if (buffer.pw != NULL) { - free(buffer.pw); - } - return ret; -} - -/* argsort */ - -static npy_intp -npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - npy_intp sz; - npy_intp vi; - npy_intp *pl, *pi, *pj, *pr; - - if (NPY_UNLIKELY(num - l == 1)) { - return 1; - } - - pl = tosort + l; - - /* (not strictly) ascending sequence */ - if (cmp(arr + (*pl) * len, arr + (*(pl + 1)) * len, py_arr) <= 0) { - for (pi = pl + 1; - pi < tosort + num - 1 && - cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; - ++pi) { - } - } - else { /* (strictly) descending sequence */ - for (pi = pl + 1; - pi < tosort + num - 1 && - cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; - ++pi) { - } - - for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - std::swap(*pj, *pr); - } - } - - ++pi; - sz = pi - pl; - - if (sz < minrun) { - if (l + minrun < num) { - sz = minrun; - } - else { - sz = num - l; - } - - pr = pl + sz; - - /* insertion sort */ - for (; pi < pr; ++pi) { - vi = *pi; - pj = pi; - - while (pl < pj && - cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) { - *pj = *(pj - 1); - --pj; - } - - *pj = vi; - } - } - - return sz; -} - -static npy_intp -npy_agallop_left(const char *arr, const npy_intp *tosort, const npy_intp size, - const char *key, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - npy_intp last_ofs, ofs, l, m, r; - - if (cmp(arr + tosort[size - 1] * len, key, py_arr) < 0) { - return size; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; - break; - } - - if (cmp(arr + tosort[size - ofs - 1] * len, key, py_arr) < 0) { - break; - } - else { - last_ofs = ofs; - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[size-ofs-1]*len] < key <= - * arr[tosort[size-last_ofs-1]*len] */ - l = size - ofs - 1; - r = size - last_ofs - 1; - - while (l + 1 < r) { - m = l + ((r - l) >> 1); - - if (cmp(arr + tosort[m] * len, key, py_arr) < 0) { - l = m; - } - else { - r = m; - } - } - - /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */ - return r; -} - -static npy_intp -npy_agallop_right(const char *arr, const npy_intp *tosort, const npy_intp size, - const char *key, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - npy_intp last_ofs, ofs, m; - - if (cmp(key, arr + tosort[0] * len, py_arr) < 0) { - return 0; - } - - last_ofs = 0; - ofs = 1; - - for (;;) { - if (size <= ofs || ofs < 0) { - ofs = size; /* arr[ofs] is never accessed */ - break; - } - - if (cmp(key, arr + tosort[ofs] * len, py_arr) < 0) { - break; - } - else { - last_ofs = ofs; - /* ofs = 1, 3, 7, 15... */ - ofs = (ofs << 1) + 1; - } - } - - /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */ - while (last_ofs + 1 < ofs) { - m = last_ofs + ((ofs - last_ofs) >> 1); - - if (cmp(key, arr + tosort[m] * len, py_arr) < 0) { - ofs = m; - } - else { - last_ofs = m; - } - } - - /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */ - return ofs; -} - -static void -npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, - npy_intp l2, npy_intp *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp *end = p2 + l2; - memcpy(p3, p1, sizeof(npy_intp) * l1); - /* first element must be in p2 otherwise skipped in the caller */ - *p1++ = *p2++; - - while (p1 < p2 && p2 < end) { - if (cmp(arr + (*p2) * len, arr + (*p3) * len, py_arr) < 0) { - *p1++ = *p2++; - } - else { - *p1++ = *p3++; - } - } - - if (p1 != p2) { - memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); - } -} - -static void -npy_amerge_right(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, - npy_intp l2, npy_intp *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - npy_intp ofs; - npy_intp *start = p1 - 1; - memcpy(p3, p2, sizeof(npy_intp) * l2); - p1 += l1 - 1; - p2 += l2 - 1; - p3 += l2 - 1; - /* first element must be in p1 otherwise skipped in the caller */ - *p2-- = *p1--; - - while (p1 < p2 && start < p1) { - if (cmp(arr + (*p3) * len, arr + (*p1) * len, py_arr) < 0) { - *p2-- = *p1--; - } - else { - *p2-- = *p3--; - } - } - - if (p1 != p2) { - ofs = p2 - start; - memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); - } -} - -static int -npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, const npy_intp at, - buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - int ret; - npy_intp s1, l1, s2, l2, k; - npy_intp *p1, *p2; - s1 = stack[at].s; - l1 = stack[at].l; - s2 = stack[at + 1].s; - l2 = stack[at + 1].l; - /* tosort[s2] belongs to tosort[s1+k] */ - k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, - cmp, py_arr); - - if (l1 == k) { - /* already sorted */ - return 0; - } - - p1 = tosort + s1 + k; - l1 -= k; - p2 = tosort + s2; - /* tosort[s2-1] belongs to tosort[s2+l2] */ - l2 = npy_agallop_left(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, - len, cmp, py_arr); - - if (l2 < l1) { - ret = resize_buffer_intp(buffer, l2); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - npy_amerge_right(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } - else { - ret = resize_buffer_intp(buffer, l1); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - npy_amerge_left(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } - - return 0; -} - -static int -npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, - buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) -{ - int ret; - npy_intp A, B, C, top; - top = *stack_ptr; - - while (1 < top) { - B = stack[top - 2].l; - C = stack[top - 1].l; - - if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { - A = stack[top - 3].l; - - if (A <= C) { - ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, - cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += B; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, - cmp, py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - } - else if (1 < top && B <= C) { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } - else { - break; - } - } - - *stack_ptr = top; - return 0; -} - -static int -npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, buffer_intp *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) -{ - int ret; - npy_intp top = *stack_ptr; - - while (2 < top) { - if (stack[top - 3].l <= stack[top - 1].l) { - ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += stack[top - 2].l; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += stack[top - 1].l; - --top; - } - } - - if (1 < top) { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, - py_arr); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - } - - return 0; -} - -NPY_NO_EXPORT int -npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) -{ - PyArrayObject *arr = reinterpret_cast(varr); - size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; - int ret; - npy_intp l, n, stack_ptr, minrun; - run stack[RUN_STACK_SIZE]; - buffer_intp buffer; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - buffer.pw = NULL; - buffer.size = 0; - stack_ptr = 0; - minrun = compute_min_run_short(num); - - for (l = 0; l < num;) { - n = npy_acount_run((char *)start, tosort, l, num, minrun, len, cmp, - arr); - /* both s and l are scaled by len */ - stack[stack_ptr].s = l; - stack[stack_ptr].l = n; - ++stack_ptr; - ret = npy_atry_collapse((char *)start, tosort, stack, &stack_ptr, - &buffer, len, cmp, arr); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - l += n; - } - - ret = npy_aforce_collapse((char *)start, tosort, stack, &stack_ptr, - &buffer, len, cmp, arr); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - - ret = 0; - -cleanup: - if (buffer.pw != NULL) { - free(buffer.pw); - } - return ret; -} - -/*************************************** - * C > C++ dispatch - ***************************************/ - -NPY_NO_EXPORT int -timsort_bool(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_byte(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_ubyte(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_short(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_ushort(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_int(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_uint(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_long(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_ulong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_longlong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_ulonglong(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_half(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_float(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_double(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_longdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_cfloat(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_cdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_clongdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_datetime(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_timedelta(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - return timsort_(start, num); -} -NPY_NO_EXPORT int -timsort_string(void *start, npy_intp num, void *varr) -{ - return string_timsort_(start, num, varr); -} -NPY_NO_EXPORT int -timsort_unicode(void *start, npy_intp num, void *varr) -{ - return string_timsort_(start, num, varr); -} - -NPY_NO_EXPORT int -atimsort_bool(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_byte(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_ubyte(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_short(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_ushort(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_int(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_uint(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_long(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_ulong(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_longlong(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_ulonglong(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_half(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_float(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_double(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_longdouble(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_cfloat(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_cdouble(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_clongdouble(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_datetime(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_timedelta(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) -{ - return atimsort_(v, tosort, num); -} -NPY_NO_EXPORT int -atimsort_string(void *v, npy_intp *tosort, npy_intp num, void *varr) -{ - return string_atimsort_(v, tosort, num, varr); -} -NPY_NO_EXPORT int -atimsort_unicode(void *v, npy_intp *tosort, npy_intp num, void *varr) -{ - return string_atimsort_(v, tosort, num, varr); -} diff --git a/numpy/_core/src/npysort/timsort.hpp b/numpy/_core/src/npysort/timsort.hpp new file mode 100644 index 000000000000..efa7e23b1140 --- /dev/null +++ b/numpy/_core/src/npysort/timsort.hpp @@ -0,0 +1,1774 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy_tag.h" + +#include +#include + +/* enough for 32 * 1.618 ** 128 elements. + If powersort was used in all cases, 90 would suffice, as 32 * 2 ** 90 >= 32 * 1.618 ** 128 */ +#define RUN_STACK_SIZE 128 + +static npy_intp +compute_min_run(npy_intp num) +{ + npy_intp r = 0; + + while (64 < num) { + r |= num & 1; + num >>= 1; + } + + return num + r; +} + +typedef struct { + npy_intp s; /* start pointer */ + npy_intp l; /* length */ + int power; /* node "level" for powersort merge strategy */ +} run; + +/* buffer for argsort. Declared here to avoid multiple declarations. */ +typedef struct { + npy_intp *pw; + npy_intp size; +} buffer_intp; + +/* buffer method */ +static inline int +resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) +{ + if (new_size <= buffer->size) { + return 0; + } + + npy_intp *new_pw = (npy_intp *)realloc(buffer->pw, new_size * sizeof(npy_intp)); + + buffer->size = new_size; + + if (NPY_UNLIKELY(new_pw == NULL)) { + return -NPY_ENOMEM; + } + else { + buffer->pw = new_pw; + return 0; + } +} + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + +template +struct buffer_ { + typename Tag::type *pw; + npy_intp size; +}; + +template +static inline int +resize_buffer_(buffer_ *buffer, npy_intp new_size) +{ + using type = typename Tag::type; + if (new_size <= buffer->size) { + return 0; + } + + type *new_pw = (type *)realloc(buffer->pw, new_size * sizeof(type)); + buffer->size = new_size; + + if (NPY_UNLIKELY(new_pw == NULL)) { + return -NPY_ENOMEM; + } + else { + buffer->pw = new_pw; + return 0; + } +} + +template +static npy_intp +count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun) +{ + npy_intp sz; + type vc, *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = arr + l; + + /* (not strictly) ascending sequence */ + if (!npy::cmp(*(pl + 1), *pl)) { + for (pi = pl + 1; pi < arr + num - 1 && !npy::cmp(*(pi + 1), *pi); + ++pi) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; pi < arr + num - 1 && npy::cmp(*(pi + 1), *pi); + ++pi) { + } + + for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { + std::swap(*pj, *pr); + } + } + + ++pi; + sz = pi - pl; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz; + + /* insertion sort */ + for (; pi < pr; ++pi) { + vc = *pi; + pj = pi; + + while (pl < pj && npy::cmp(vc, *(pj - 1))) { + *pj = *(pj - 1); + --pj; + } + + *pj = vc; + } + } + + return sz; +} + +/* when the left part of the array (p1) is smaller, copy p1 to buffer + * and merge from left to right + */ +template +static void +merge_left_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) +{ + type *end = p2 + l2; + memcpy(p3, p1, sizeof(type) * l1); + /* first element must be in p2 otherwise skipped in the caller */ + *p1++ = *p2++; + + while (p1 < p2 && p2 < end) { + if (npy::cmp(*p2, *p3)) { + *p1++ = *p2++; + } + else { + *p1++ = *p3++; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(type) * (p2 - p1)); + } +} + +/* when the right part of the array (p2) is smaller, copy p2 to buffer + * and merge from right to left + */ +template +static void +merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) +{ + npy_intp ofs; + type *start = p1 - 1; + memcpy(p3, p2, sizeof(type) * l2); + p1 += l1 - 1; + p2 += l2 - 1; + p3 += l2 - 1; + /* first element must be in p1 otherwise skipped in the caller */ + *p2-- = *p1--; + + while (p1 < p2 && start < p1) { + if (npy::cmp(*p3, *p1)) { + *p2-- = *p1--; + } + else { + *p2-- = *p3--; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + 1, p3 - ofs + 1, sizeof(type) * ofs); + } +} + +/* Note: the naming convention of gallop functions are different from that of + * CPython. For example, here gallop_right means gallop from left toward right, + * whereas in CPython gallop_right means gallop + * and find the right most element among equal elements + */ +template +static npy_intp +gallop_right_(const type *arr, const npy_intp size, const type key) +{ + npy_intp last_ofs, ofs, m; + + if (npy::cmp(key, arr[0])) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (npy::cmp(key, arr[ofs])) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[last_ofs] <= key < arr[ofs] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (npy::cmp(key, arr[m])) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[ofs-1] <= key < arr[ofs] */ + return ofs; +} + +template +static npy_intp +gallop_left_(const type *arr, const npy_intp size, const type key) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (npy::cmp(arr[size - 1], key)) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (npy::cmp(arr[size - ofs - 1], key)) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[size-ofs-1] < key <= arr[size-last_ofs-1] */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (npy::cmp(arr[m], key)) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[r-1] < key <= arr[r] */ + return r; +} + +template +static int +merge_at_(type *arr, const run *stack, const npy_intp at, buffer_ *buffer) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + type *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* arr[s2] belongs to arr[s1+k]. + * if try to comment this out for debugging purpose, remember + * in the merging process the first element is skipped + */ + k = gallop_right_(arr + s1, l1, arr[s2]); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = arr + s1 + k; + l1 -= k; + p2 = arr + s2; + /* arr[s2-1] belongs to arr[s2+l2] */ + l2 = gallop_left_(arr + s2, l2, arr[s2 - 1]); + + if (l2 < l1) { + ret = resize_buffer_(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + merge_right_(p1, l1, p2, l2, buffer->pw); + } + else { + ret = resize_buffer_(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + merge_left_(p1, l1, p2, l2, buffer->pw); + } + + return 0; +} + +/* See https://github.com/python/cpython/blob/ea23c897cd25702e72a04e06664f6864f07a7c5d/Objects/listsort.txt +* for a detailed explanation. +* In CPython, *num* is called *n*, but we changed it for consistency with the NumPy implementation. +*/ +static int +powerloop(npy_intp s1, npy_intp n1, npy_intp n2, npy_intp num) +{ + int result = 0; + npy_intp a = 2 * s1 + n1; /* 2*a */ + npy_intp b = a + n1 + n2; /* 2*b */ + for (;;) { + ++result; + if (a >= num) { /* both quotient bits are 1 */ + a -= num; + b -= num; + } + else if (b >= num) { /* a/num bit is 0, b/num bit is 1 */ + break; + } + a <<= 1; + b <<= 1; + } + return result; +} + +template +static int +found_new_run_(type *arr, run *stack, npy_intp *stack_ptr, npy_intp n2, + npy_intp num, buffer_ *buffer) +{ + int ret; + if (*stack_ptr > 0) { + npy_intp s1 = stack[*stack_ptr - 1].s; + npy_intp n1 = stack[*stack_ptr - 1].l; + int power = powerloop(s1, n1, n2, num); + while (*stack_ptr > 1 && stack[*stack_ptr - 2].power > power) { + ret = merge_at_(arr, stack, *stack_ptr - 2, buffer); + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + stack[*stack_ptr - 2].l += stack[*stack_ptr - 1].l; + --(*stack_ptr); + } + stack[*stack_ptr - 1].power = power; + } + return 0; +} + +template +static int +force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + buffer_ *buffer) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = merge_at_(arr, stack, top - 3, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = merge_at_(arr, stack, top - 2, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = merge_at_(arr, stack, top - 2, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +template +static int +timsort_(void *start, npy_intp num) +{ + int ret; + npy_intp l, n, stack_ptr, minrun; + buffer_ buffer; + run stack[RUN_STACK_SIZE]; + buffer.pw = NULL; + buffer.size = 0; + stack_ptr = 0; + minrun = compute_min_run(num); + + for (l = 0; l < num;) { + n = count_run_((type *)start, l, num, minrun); + ret = found_new_run_((type *)start, stack, &stack_ptr, n, num, &buffer); + if (NPY_UNLIKELY(ret < 0)) + goto cleanup; + + // Push the new run onto the stack. + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + l += n; + } + + ret = force_collapse_((type *)start, stack, &stack_ptr, &buffer); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; +cleanup: + + free(buffer.pw); + + return ret; +} + +/* argsort */ + +template +static npy_intp +acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, + npy_intp minrun) +{ + npy_intp sz; + type vc; + npy_intp vi; + npy_intp *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = tosort + l; + + /* (not strictly) ascending sequence */ + if (!npy::cmp(arr[*(pl + 1)], arr[*pl])) { + for (pi = pl + 1; + pi < tosort + num - 1 && !npy::cmp(arr[*(pi + 1)], arr[*pi]); + ++pi) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && npy::cmp(arr[*(pi + 1)], arr[*pi]); + ++pi) { + } + + for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { + std::swap(*pj, *pr); + } + } + + ++pi; + sz = pi - pl; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz; + + /* insertion sort */ + for (; pi < pr; ++pi) { + vi = *pi; + vc = arr[*pi]; + pj = pi; + + while (pl < pj && npy::cmp(vc, arr[*(pj - 1)])) { + *pj = *(pj - 1); + --pj; + } + + *pj = vi; + } + } + + return sz; +} + +template +static npy_intp +agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type key) +{ + npy_intp last_ofs, ofs, m; + + if (npy::cmp(key, arr[tosort[0]])) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (npy::cmp(key, arr[tosort[ofs]])) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[last_ofs]] <= key < arr[tosort[ofs]] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (npy::cmp(key, arr[tosort[m]])) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[tosort[ofs-1]] <= key < arr[tosort[ofs]] */ + return ofs; +} + +template +static npy_intp +agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type key) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (npy::cmp(arr[tosort[size - 1]], key)) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (npy::cmp(arr[tosort[size - ofs - 1]], key)) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] + */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (npy::cmp(arr[tosort[m]], key)) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[tosort[r-1]] < key <= arr[tosort[r]] */ + return r; +} + +template +static void +amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3) +{ + npy_intp *end = p2 + l2; + memcpy(p3, p1, sizeof(npy_intp) * l1); + /* first element must be in p2 otherwise skipped in the caller */ + *p1++ = *p2++; + + while (p1 < p2 && p2 < end) { + if (npy::cmp(arr[*p2], arr[*p3])) { + *p1++ = *p2++; + } + else { + *p1++ = *p3++; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); + } +} + +template +static void +amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3) +{ + npy_intp ofs; + npy_intp *start = p1 - 1; + memcpy(p3, p2, sizeof(npy_intp) * l2); + p1 += l1 - 1; + p2 += l2 - 1; + p3 += l2 - 1; + /* first element must be in p1 otherwise skipped in the caller */ + *p2-- = *p1--; + + while (p1 < p2 && start < p1) { + if (npy::cmp(arr[*p3], arr[*p1])) { + *p2-- = *p1--; + } + else { + *p2-- = *p3--; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); + } +} + +template +static int +amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + npy_intp *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* tosort[s2] belongs to tosort[s1+k] */ + k = agallop_right_(arr, tosort + s1, l1, arr[tosort[s2]]); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = tosort + s1 + k; + l1 -= k; + p2 = tosort + s2; + /* tosort[s2-1] belongs to tosort[s2+l2] */ + l2 = agallop_left_(arr, tosort + s2, l2, arr[tosort[s2 - 1]]); + + if (l2 < l1) { + ret = resize_buffer_intp(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + amerge_right_(arr, p1, l1, p2, l2, buffer->pw); + } + else { + ret = resize_buffer_intp(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + amerge_left_(arr, p1, l1, p2, l2, buffer->pw); + } + + return 0; +} + +template +static int +afound_new_run_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, npy_intp n2, + npy_intp num, buffer_intp *buffer) +{ + int ret; + if (*stack_ptr > 0) { + npy_intp s1 = stack[*stack_ptr - 1].s; + npy_intp n1 = stack[*stack_ptr - 1].l; + int power = powerloop(s1, n1, n2, num); + while (*stack_ptr > 1 && stack[*stack_ptr - 2].power > power) { + ret = amerge_at_(arr, tosort, stack, *stack_ptr - 2, buffer); + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + stack[*stack_ptr - 2].l += stack[*stack_ptr - 1].l; + --(*stack_ptr); + } + stack[*stack_ptr - 1].power = power; + } + return 0; +} + +template +static int +aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = amerge_at_(arr, tosort, stack, top - 3, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +// ``PyArray_SortFunc``-shaped trampoline. +template +static int +timsort_impl(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_(start, num); +} + +template +static int +atimsort_(void *v, npy_intp *tosort, npy_intp num) +{ + int ret; + npy_intp l, n, stack_ptr, minrun; + buffer_intp buffer; + run stack[RUN_STACK_SIZE]; + buffer.pw = NULL; + buffer.size = 0; + stack_ptr = 0; + minrun = compute_min_run(num); + + for (l = 0; l < num;) { + n = acount_run_((type *)v, tosort, l, num, minrun); + ret = afound_new_run_((type*)v, tosort, stack, &stack_ptr, n, num, &buffer); + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + l += n; + } + + ret = aforce_collapse_((type *)v, tosort, stack, &stack_ptr, &buffer); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; +cleanup: + + if (buffer.pw != NULL) { + free(buffer.pw); + } + + return ret; +} + +// ``PyArray_ArgSortFunc``-shaped trampoline. +template +static int +atimsort_impl(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_(v, tosort, num); +} + +/* For string sorts and generic sort, element comparisons are very expensive, + * and the time cost of insertion sort (involves N**2 comparison) clearly + * hurts. Implementing binary insertion sort and probably gallop mode during + * merging process can hopefully boost the performance. Here as a temporary + * workaround we use shorter run length to reduce the cost of insertion sort. + */ + +static npy_intp +compute_min_run_short(npy_intp num) +{ + npy_intp r = 0; + + while (16 < num) { + r |= num & 1; + num >>= 1; + } + + return num + r; +} + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + +template +struct string_buffer_ { + typename Tag::type *pw; + npy_intp size; + size_t len; +}; + +template +static inline int +resize_buffer_(string_buffer_ *buffer, npy_intp new_size) +{ + using type = typename Tag::type; + if (new_size <= buffer->size) { + return 0; + } + + type *new_pw = (type *)realloc(buffer->pw, sizeof(type) * new_size * buffer->len); + buffer->size = new_size; + + if (NPY_UNLIKELY(new_pw == NULL)) { + return -NPY_ENOMEM; + } + else { + buffer->pw = new_pw; + return 0; + } +} + +template +static npy_intp +count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun, type *vp, + size_t len) +{ + npy_intp sz; + type *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = arr + l * len; + + /* (not strictly) ascending sequence */ + if (!npy::cmp(pl + len, pl, len)) { + for (pi = pl + len; + pi < arr + (num - 1) * len && !npy::cmp(pi + len, pi, len); + pi += len) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + len; + pi < arr + (num - 1) * len && npy::cmp(pi + len, pi, len); + pi += len) { + } + + for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { + Tag::swap(pj, pr, len); + } + } + + pi += len; + sz = (pi - pl) / len; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz * len; + + /* insertion sort */ + for (; pi < pr; pi += len) { + Tag::copy(vp, pi, len); + pj = pi; + + while (pl < pj && npy::cmp(vp, pj - len, len)) { + Tag::copy(pj, pj - len, len); + pj -= len; + } + + Tag::copy(pj, vp, len); + } + } + + return sz; +} + +template +static npy_intp +gallop_right_(const typename Tag::type *arr, const npy_intp size, + const typename Tag::type *key, size_t len) +{ + npy_intp last_ofs, ofs, m; + + if (npy::cmp(key, arr, len)) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (npy::cmp(key, arr + ofs * len, len)) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[last_ofs*len] <= key < arr[ofs*len] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (npy::cmp(key, arr + m * len, len)) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */ + return ofs; +} + +template +static npy_intp +gallop_left_(const typename Tag::type *arr, const npy_intp size, + const typename Tag::type *key, size_t len) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (npy::cmp(arr + (size - 1) * len, key, len)) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (npy::cmp(arr + (size - ofs - 1) * len, key, len)) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (npy::cmp(arr + m * len, key, len)) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[(r-1)*len] < key <= arr[r*len] */ + return r; +} + +template +static void +merge_left_(typename Tag::type *p1, npy_intp l1, typename Tag::type *p2, + npy_intp l2, typename Tag::type *p3, size_t len) +{ type *end = p2 + l2 * len; + memcpy(p3, p1, sizeof(type) * l1 * len); + /* first element must be in p2 otherwise skipped in the caller */ + Tag::copy(p1, p2, len); + p1 += len; + p2 += len; + + while (p1 < p2 && p2 < end) { + if (npy::cmp(p2, p3, len)) { + Tag::copy(p1, p2, len); + p1 += len; + p2 += len; + } + else { + Tag::copy(p1, p3, len); + p1 += len; + p3 += len; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(type) * (p2 - p1)); + } +} + +template +static void +merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3, + size_t len) +{ + npy_intp ofs; + type *start = p1 - len; + memcpy(p3, p2, sizeof(type) * l2 * len); + p1 += (l1 - 1) * len; + p2 += (l2 - 1) * len; + p3 += (l2 - 1) * len; + /* first element must be in p1 otherwise skipped in the caller */ + Tag::copy(p2, p1, len); + p2 -= len; + p1 -= len; + + while (p1 < p2 && start < p1) { + if (npy::cmp(p3, p1, len)) { + Tag::copy(p2, p1, len); + p2 -= len; + p1 -= len; + } + else { + Tag::copy(p2, p3, len); + p2 -= len; + p3 -= len; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + len, p3 - ofs + len, sizeof(type) * ofs); + } +} + +template +static int +merge_at_(type *arr, const run *stack, const npy_intp at, + string_buffer_ *buffer, size_t len) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + type *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* arr[s2] belongs to arr[s1+k] */ + Tag::copy(buffer->pw, arr + s2 * len, len); + k = gallop_right_(arr + s1 * len, l1, buffer->pw, len); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = arr + (s1 + k) * len; + l1 -= k; + p2 = arr + s2 * len; + /* arr[s2-1] belongs to arr[s2+l2] */ + Tag::copy(buffer->pw, arr + (s2 - 1) * len, len); + l2 = gallop_left_(arr + s2 * len, l2, buffer->pw, len); + + if (l2 < l1) { + ret = resize_buffer_(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + merge_right_(p1, l1, p2, l2, buffer->pw, len); + } + else { + ret = resize_buffer_(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + merge_left_(p1, l1, p2, l2, buffer->pw, len); + } + + return 0; +} + +template +static int +try_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + string_buffer_ *buffer, size_t len) +{ + int ret; + npy_intp A, B, C, top; + top = *stack_ptr; + + while (1 < top) { + B = stack[top - 2].l; + C = stack[top - 1].l; + + if ((2 < top && stack[top - 3].l <= B + C) || + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + A = stack[top - 3].l; + + if (A <= C) { + ret = merge_at_(arr, stack, top - 3, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += B; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = merge_at_(arr, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + } + else if (1 < top && B <= C) { + ret = merge_at_(arr, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + else { + break; + } + } + + *stack_ptr = top; + return 0; +} + +template +static int +force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + string_buffer_ *buffer, size_t len) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = merge_at_(arr, stack, top - 3, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = merge_at_(arr, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = merge_at_(arr, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +template +NPY_NO_EXPORT int +string_timsort_(void *start, npy_intp num, int elsize) +{ + size_t len = elsize / sizeof(type); + int ret; + npy_intp l, n, stack_ptr, minrun; + run stack[RUN_STACK_SIZE]; + string_buffer_ buffer; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + buffer.pw = NULL; + buffer.size = 0; + buffer.len = len; + stack_ptr = 0; + minrun = compute_min_run_short(num); + /* used for insertion sort and gallop key */ + ret = resize_buffer_(&buffer, 1); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + for (l = 0; l < num;) { + n = count_run_((type *)start, l, num, minrun, buffer.pw, len); + /* both s and l are scaled by len */ + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + ret = try_collapse_((type *)start, stack, &stack_ptr, &buffer, + len); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + l += n; + } + + ret = force_collapse_((type *)start, stack, &stack_ptr, &buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; + +cleanup: + if (buffer.pw != NULL) { + free(buffer.pw); + } + return ret; +} + +/* argsort */ + +template +static npy_intp +acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, + npy_intp minrun, size_t len) +{ + npy_intp sz; + npy_intp vi; + npy_intp *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = tosort + l; + + /* (not strictly) ascending sequence */ + if (!npy::cmp(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) { + for (pi = pl + 1; + pi < tosort + num - 1 && + !npy::cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); + ++pi) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && + npy::cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); + ++pi) { + } + + for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { + std::swap(*pj, *pr); + } + } + + ++pi; + sz = pi - pl; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz; + + /* insertion sort */ + for (; pi < pr; ++pi) { + vi = *pi; + pj = pi; + + while (pl < pj && + npy::cmp(arr + vi * len, arr + (*(pj - 1)) * len, len)) { + *pj = *(pj - 1); + --pj; + } + + *pj = vi; + } + } + + return sz; +} + +template +static npy_intp +agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type *key, size_t len) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (npy::cmp(arr + tosort[size - 1] * len, key, len)) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (npy::cmp(arr + tosort[size - ofs - 1] * len, key, len)) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[size-ofs-1]*len] < key <= + * arr[tosort[size-last_ofs-1]*len] */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (npy::cmp(arr + tosort[m] * len, key, len)) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */ + return r; +} + +template +static npy_intp +agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type *key, size_t len) +{ + npy_intp last_ofs, ofs, m; + + if (npy::cmp(key, arr + tosort[0] * len, len)) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (npy::cmp(key, arr + tosort[ofs] * len, len)) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (npy::cmp(key, arr + tosort[m] * len, len)) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */ + return ofs; +} + +template +static void +amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3, size_t len) +{ + npy_intp *end = p2 + l2; + memcpy(p3, p1, sizeof(npy_intp) * l1); + /* first element must be in p2 otherwise skipped in the caller */ + *p1++ = *p2++; + + while (p1 < p2 && p2 < end) { + if (npy::cmp(arr + (*p2) * len, arr + (*p3) * len, len)) { + *p1++ = *p2++; + } + else { + *p1++ = *p3++; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); + } +} + +template +static void +amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3, size_t len) +{ + npy_intp ofs; + npy_intp *start = p1 - 1; + memcpy(p3, p2, sizeof(npy_intp) * l2); + p1 += l1 - 1; + p2 += l2 - 1; + p3 += l2 - 1; + /* first element must be in p1 otherwise skipped in the caller */ + *p2-- = *p1--; + + while (p1 < p2 && start < p1) { + if (npy::cmp(arr + (*p3) * len, arr + (*p1) * len, len)) { + *p2-- = *p1--; + } + else { + *p2-- = *p3--; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); + } +} + +template +static int +amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer, size_t len) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + npy_intp *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* tosort[s2] belongs to tosort[s1+k] */ + k = agallop_right_(arr, tosort + s1, l1, arr + tosort[s2] * len, len); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = tosort + s1 + k; + l1 -= k; + p2 = tosort + s2; + /* tosort[s2-1] belongs to tosort[s2+l2] */ + l2 = agallop_left_(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, + len); + + if (l2 < l1) { + ret = resize_buffer_intp(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + amerge_right_(arr, p1, l1, p2, l2, buffer->pw, len); + } + else { + ret = resize_buffer_intp(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + amerge_left_(arr, p1, l1, p2, l2, buffer->pw, len); + } + + return 0; +} + +template +static int +atry_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len) +{ + int ret; + npy_intp A, B, C, top; + top = *stack_ptr; + + while (1 < top) { + B = stack[top - 2].l; + C = stack[top - 1].l; + + if ((2 < top && stack[top - 3].l <= B + C) || + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + A = stack[top - 3].l; + + if (A <= C) { + ret = amerge_at_(arr, tosort, stack, top - 3, buffer, + len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += B; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer, + len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + } + else if (1 < top && B <= C) { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + else { + break; + } + } + + *stack_ptr = top; + return 0; +} + +template +static int +aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = amerge_at_(arr, tosort, stack, top - 3, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = amerge_at_(arr, tosort, stack, top - 2, buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +template +NPY_NO_EXPORT int +string_atimsort_(void *start, npy_intp *tosort, npy_intp num, int elsize) +{ + size_t len = elsize / sizeof(type); + int ret; + npy_intp l, n, stack_ptr, minrun; + run stack[RUN_STACK_SIZE]; + buffer_intp buffer; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + buffer.pw = NULL; + buffer.size = 0; + stack_ptr = 0; + minrun = compute_min_run_short(num); + + for (l = 0; l < num;) { + n = acount_run_((type *)start, tosort, l, num, minrun, len); + /* both s and l are scaled by len */ + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + ret = atry_collapse_((type *)start, tosort, stack, &stack_ptr, + &buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + l += n; + } + + ret = aforce_collapse_((type *)start, tosort, stack, &stack_ptr, + &buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; + +cleanup: + if (buffer.pw != NULL) { + free(buffer.pw); + } + return ret; +} \ No newline at end of file diff --git a/numpy/_core/src/npysort/timsort_generic.cpp b/numpy/_core/src/npysort/timsort_generic.cpp new file mode 100644 index 000000000000..4973e8f3c341 --- /dev/null +++ b/numpy/_core/src/npysort/timsort_generic.cpp @@ -0,0 +1,985 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario can + * be slower than the merge and heap sorts. The merge sort requires + * extra memory and so for large arrays may not be useful. + * + * The merge sort is *stable*, meaning that equal components + * are unmoved from their entry versions, so it can be used to + * implement lexicographic sorting on multiple keys. + * + * The heap sort is included for completeness. + */ + +/* For details of Timsort, refer to + * https://github.com/python/cpython/blob/3.7/Objects/listsort.txt + */ + +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy_tag.h" + +#include +#include + +/* enough for 32 * 1.618 ** 128 elements. + If powersort was used in all cases, 90 would suffice, as 32 * 2 ** 90 >= 32 * 1.618 ** 128 */ +#define RUN_STACK_SIZE 128 + +/* For string sorts and generic sort, element comparisons are very expensive, + * and the time cost of insertion sort (involves N**2 comparison) clearly + * hurts. Implementing binary insertion sort and probably gallop mode during + * merging process can hopefully boost the performance. Here as a temporary + * workaround we use shorter run length to reduce the cost of insertion sort. + */ + +static npy_intp +compute_min_run_short(npy_intp num) +{ + npy_intp r = 0; + + while (16 < num) { + r |= num & 1; + num >>= 1; + } + + return num + r; +} + +typedef struct { + npy_intp s; /* start pointer */ + npy_intp l; /* length */ + int power; /* node "level" for powersort merge strategy */ +} run; + +/* buffer for argsort. Declared here to avoid multiple declarations. */ +typedef struct { + npy_intp *pw; + npy_intp size; +} buffer_intp; + +/* buffer method */ +static inline int +resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) +{ + if (new_size <= buffer->size) { + return 0; + } + + npy_intp *new_pw = (npy_intp *)realloc(buffer->pw, new_size * sizeof(npy_intp)); + + buffer->size = new_size; + + if (NPY_UNLIKELY(new_pw == NULL)) { + return -NPY_ENOMEM; + } + else { + buffer->pw = new_pw; + return 0; + } +} + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + +typedef struct { + char *pw; + npy_intp size; + size_t len; +} buffer_char; + +static inline int +resize_buffer_char(buffer_char *buffer, npy_intp new_size) +{ + if (new_size <= buffer->size) { + return 0; + } + + char *new_pw = (char *)realloc(buffer->pw, sizeof(char) * new_size * buffer->len); + buffer->size = new_size; + + if (NPY_UNLIKELY(new_pw == NULL)) { + return -NPY_ENOMEM; + } + else { + buffer->pw = new_pw; + return 0; + } +} + +static npy_intp +npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, char *vp, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp sz; + char *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = arr + l * len; + + /* (not strictly) ascending sequence */ + if (cmp(pl, pl + len, py_arr) <= 0) { + for (pi = pl + len; + pi < arr + (num - 1) * len && cmp(pi, pi + len, py_arr) <= 0; + pi += len) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + len; + pi < arr + (num - 1) * len && cmp(pi + len, pi, py_arr) < 0; + pi += len) { + } + + for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { + GENERIC_SWAP(pj, pr, len); + } + } + + pi += len; + sz = (pi - pl) / len; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz * len; + + /* insertion sort */ + for (; pi < pr; pi += len) { + GENERIC_COPY(vp, pi, len); + pj = pi; + + while (pl < pj && cmp(vp, pj - len, py_arr) < 0) { + GENERIC_COPY(pj, pj - len, len); + pj -= len; + } + + GENERIC_COPY(pj, vp, len); + } + } + + return sz; +} + +static npy_intp +npy_gallop_right(const char *arr, const npy_intp size, const char *key, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp last_ofs, ofs, m; + + if (cmp(key, arr, py_arr) < 0) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (cmp(key, arr + ofs * len, py_arr) < 0) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[last_ofs*len] <= key < arr[ofs*len] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (cmp(key, arr + m * len, py_arr) < 0) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */ + return ofs; +} + +static npy_intp +npy_gallop_left(const char *arr, const npy_intp size, const char *key, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (cmp(arr + (size - 1) * len, key, py_arr) < 0) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (cmp(arr + (size - ofs - 1) * len, key, py_arr) < 0) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (cmp(arr + m * len, key, py_arr) < 0) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[(r-1)*len] < key <= arr[r*len] */ + return r; +} + +static void +npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + char *end = p2 + l2 * len; + memcpy(p3, p1, sizeof(char) * l1 * len); + /* first element must be in p2 otherwise skipped in the caller */ + GENERIC_COPY(p1, p2, len); + p1 += len; + p2 += len; + + while (p1 < p2 && p2 < end) { + if (cmp(p2, p3, py_arr) < 0) { + GENERIC_COPY(p1, p2, len); + p1 += len; + p2 += len; + } + else { + GENERIC_COPY(p1, p3, len); + p1 += len; + p3 += len; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(char) * (p2 - p1)); + } +} + +static void +npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp ofs; + char *start = p1 - len; + memcpy(p3, p2, sizeof(char) * l2 * len); + p1 += (l1 - 1) * len; + p2 += (l2 - 1) * len; + p3 += (l2 - 1) * len; + /* first element must be in p1 otherwise skipped in the caller */ + GENERIC_COPY(p2, p1, len); + p2 -= len; + p1 -= len; + + while (p1 < p2 && start < p1) { + if (cmp(p3, p1, py_arr) < 0) { + GENERIC_COPY(p2, p1, len); + p2 -= len; + p1 -= len; + } + else { + GENERIC_COPY(p2, p3, len); + p2 -= len; + p3 -= len; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + len, p3 - ofs + len, sizeof(char) * ofs); + } +} + +static int +npy_merge_at(char *arr, const run *stack, const npy_intp at, + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + char *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* arr[s2] belongs to arr[s1+k] */ + GENERIC_COPY(buffer->pw, arr + s2 * len, len); + k = npy_gallop_right(arr + s1 * len, l1, buffer->pw, len, cmp, py_arr); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = arr + (s1 + k) * len; + l1 -= k; + p2 = arr + s2 * len; + /* arr[s2-1] belongs to arr[s2+l2] */ + GENERIC_COPY(buffer->pw, arr + (s2 - 1) * len, len); + l2 = npy_gallop_left(arr + s2 * len, l2, buffer->pw, len, cmp, py_arr); + + if (l2 < l1) { + ret = resize_buffer_char(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + npy_merge_right(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); + } + else { + ret = resize_buffer_char(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + npy_merge_left(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); + } + + return 0; +} + +static int +npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + int ret; + npy_intp A, B, C, top; + top = *stack_ptr; + + while (1 < top) { + B = stack[top - 2].l; + C = stack[top - 1].l; + + if ((2 < top && stack[top - 3].l <= B + C) || + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + A = stack[top - 3].l; + + if (A <= C) { + ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += B; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + } + else if (1 < top && B <= C) { + ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + else { + break; + } + } + + *stack_ptr = top; + return 0; +} + +static int +npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +NPY_NO_EXPORT int +npy_timsort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = reinterpret_cast(varr); + size_t len = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + int ret; + npy_intp l, n, stack_ptr, minrun; + run stack[RUN_STACK_SIZE]; + buffer_char buffer; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + buffer.pw = NULL; + buffer.size = 0; + buffer.len = len; + stack_ptr = 0; + minrun = compute_min_run_short(num); + + /* used for insertion sort and gallop key */ + ret = resize_buffer_char(&buffer, len); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + for (l = 0; l < num;) { + n = npy_count_run((char *)start, l, num, minrun, buffer.pw, len, cmp, + arr); + + /* both s and l are scaled by len */ + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + ret = npy_try_collapse((char *)start, stack, &stack_ptr, &buffer, len, + cmp, arr); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + l += n; + } + + ret = npy_force_collapse((char *)start, stack, &stack_ptr, &buffer, len, + cmp, arr); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; + +cleanup: + if (buffer.pw != NULL) { + free(buffer.pw); + } + return ret; +} + +/* argsort */ + +static npy_intp +npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, + npy_intp minrun, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + npy_intp sz; + npy_intp vi; + npy_intp *pl, *pi, *pj, *pr; + + if (NPY_UNLIKELY(num - l == 1)) { + return 1; + } + + pl = tosort + l; + + /* (not strictly) ascending sequence */ + if (cmp(arr + (*pl) * len, arr + (*(pl + 1)) * len, py_arr) <= 0) { + for (pi = pl + 1; + pi < tosort + num - 1 && + cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; + ++pi) { + } + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && + cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; + ++pi) { + } + + for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { + std::swap(*pj, *pr); + } + } + + ++pi; + sz = pi - pl; + + if (sz < minrun) { + if (l + minrun < num) { + sz = minrun; + } + else { + sz = num - l; + } + + pr = pl + sz; + + /* insertion sort */ + for (; pi < pr; ++pi) { + vi = *pi; + pj = pi; + + while (pl < pj && + cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) { + *pj = *(pj - 1); + --pj; + } + + *pj = vi; + } + } + + return sz; +} + +static npy_intp +npy_agallop_left(const char *arr, const npy_intp *tosort, const npy_intp size, + const char *key, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + npy_intp last_ofs, ofs, l, m, r; + + if (cmp(arr + tosort[size - 1] * len, key, py_arr) < 0) { + return size; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; + break; + } + + if (cmp(arr + tosort[size - ofs - 1] * len, key, py_arr) < 0) { + break; + } + else { + last_ofs = ofs; + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[size-ofs-1]*len] < key <= + * arr[tosort[size-last_ofs-1]*len] */ + l = size - ofs - 1; + r = size - last_ofs - 1; + + while (l + 1 < r) { + m = l + ((r - l) >> 1); + + if (cmp(arr + tosort[m] * len, key, py_arr) < 0) { + l = m; + } + else { + r = m; + } + } + + /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */ + return r; +} + +static npy_intp +npy_agallop_right(const char *arr, const npy_intp *tosort, const npy_intp size, + const char *key, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + npy_intp last_ofs, ofs, m; + + if (cmp(key, arr + tosort[0] * len, py_arr) < 0) { + return 0; + } + + last_ofs = 0; + ofs = 1; + + for (;;) { + if (size <= ofs || ofs < 0) { + ofs = size; /* arr[ofs] is never accessed */ + break; + } + + if (cmp(key, arr + tosort[ofs] * len, py_arr) < 0) { + break; + } + else { + last_ofs = ofs; + /* ofs = 1, 3, 7, 15... */ + ofs = (ofs << 1) + 1; + } + } + + /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */ + while (last_ofs + 1 < ofs) { + m = last_ofs + ((ofs - last_ofs) >> 1); + + if (cmp(key, arr + tosort[m] * len, py_arr) < 0) { + ofs = m; + } + else { + last_ofs = m; + } + } + + /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */ + return ofs; +} + +static void +npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, + npy_intp l2, npy_intp *p3, size_t len, + PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp *end = p2 + l2; + memcpy(p3, p1, sizeof(npy_intp) * l1); + /* first element must be in p2 otherwise skipped in the caller */ + *p1++ = *p2++; + + while (p1 < p2 && p2 < end) { + if (cmp(arr + (*p2) * len, arr + (*p3) * len, py_arr) < 0) { + *p1++ = *p2++; + } + else { + *p1++ = *p3++; + } + } + + if (p1 != p2) { + memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1)); + } +} + +static void +npy_amerge_right(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, + npy_intp l2, npy_intp *p3, size_t len, + PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + npy_intp ofs; + npy_intp *start = p1 - 1; + memcpy(p3, p2, sizeof(npy_intp) * l2); + p1 += l1 - 1; + p2 += l2 - 1; + p3 += l2 - 1; + /* first element must be in p1 otherwise skipped in the caller */ + *p2-- = *p1--; + + while (p1 < p2 && start < p1) { + if (cmp(arr + (*p3) * len, arr + (*p1) * len, py_arr) < 0) { + *p2-- = *p1--; + } + else { + *p2-- = *p3--; + } + } + + if (p1 != p2) { + ofs = p2 - start; + memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs); + } +} + +static int +npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + int ret; + npy_intp s1, l1, s2, l2, k; + npy_intp *p1, *p2; + s1 = stack[at].s; + l1 = stack[at].l; + s2 = stack[at + 1].s; + l2 = stack[at + 1].l; + /* tosort[s2] belongs to tosort[s1+k] */ + k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, + cmp, py_arr); + + if (l1 == k) { + /* already sorted */ + return 0; + } + + p1 = tosort + s1 + k; + l1 -= k; + p2 = tosort + s2; + /* tosort[s2-1] belongs to tosort[s2+l2] */ + l2 = npy_agallop_left(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, + len, cmp, py_arr); + + if (l2 < l1) { + ret = resize_buffer_intp(buffer, l2); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + npy_amerge_right(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); + } + else { + ret = resize_buffer_intp(buffer, l1); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + npy_amerge_left(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); + } + + return 0; +} + +static int +npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) +{ + int ret; + npy_intp A, B, C, top; + top = *stack_ptr; + + while (1 < top) { + B = stack[top - 2].l; + C = stack[top - 1].l; + + if ((2 < top && stack[top - 3].l <= B + C) || + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + A = stack[top - 3].l; + + if (A <= C) { + ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, + cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += B; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, + cmp, py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + } + else if (1 < top && B <= C) { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += C; + --top; + } + else { + break; + } + } + + *stack_ptr = top; + return 0; +} + +static int +npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, + npy_intp *stack_ptr, buffer_intp *buffer, size_t len, + PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +{ + int ret; + npy_intp top = *stack_ptr; + + while (2 < top) { + if (stack[top - 3].l <= stack[top - 1].l) { + ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 3].l += stack[top - 2].l; + stack[top - 2] = stack[top - 1]; + --top; + } + else { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + + stack[top - 2].l += stack[top - 1].l; + --top; + } + } + + if (1 < top) { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); + + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } + } + + return 0; +} + +NPY_NO_EXPORT int +npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) +{ + PyArrayObject *arr = reinterpret_cast(varr); + size_t len = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + int ret; + npy_intp l, n, stack_ptr, minrun; + run stack[RUN_STACK_SIZE]; + buffer_intp buffer; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + buffer.pw = NULL; + buffer.size = 0; + stack_ptr = 0; + minrun = compute_min_run_short(num); + + for (l = 0; l < num;) { + n = npy_acount_run((char *)start, tosort, l, num, minrun, len, cmp, + arr); + /* both s and l are scaled by len */ + stack[stack_ptr].s = l; + stack[stack_ptr].l = n; + ++stack_ptr; + ret = npy_atry_collapse((char *)start, tosort, stack, &stack_ptr, + &buffer, len, cmp, arr); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + l += n; + } + + ret = npy_aforce_collapse((char *)start, tosort, stack, &stack_ptr, + &buffer, len, cmp, arr); + + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } + + ret = 0; + +cleanup: + if (buffer.pw != NULL) { + free(buffer.pw); + } + return ret; +} diff --git a/numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp b/numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp index 04bb03532719..9bfbee80749f 100644 --- a/numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp +++ b/numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp @@ -1,6 +1,7 @@ #include "x86_simd_qsort.hpp" #ifndef __CYGWIN__ +#include #include "x86-simd-sort/src/x86simdsort-static-incl.h" #define DISPATCH_ARG_METHODS(TYPE) \ @@ -8,8 +9,9 @@ template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(TYPE* arr, npy_intp* arg, npy { \ x86simdsortStatic::argselect(arr, reinterpret_cast(arg), kth, num, true); \ } \ -template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(TYPE* arr, npy_intp *arg, npy_intp size) \ +template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(TYPE* arr, npy_intp *arg, npy_intp size, bool reverse) \ { \ + assert(!reverse); (void)reverse; \ x86simdsortStatic::argsort(arr, reinterpret_cast(arg), size, true); \ } \ diff --git a/numpy/_core/src/npysort/x86_simd_qsort.dispatch.cpp b/numpy/_core/src/npysort/x86_simd_qsort.dispatch.cpp index c4505f058857..1d20c152b48b 100644 --- a/numpy/_core/src/npysort/x86_simd_qsort.dispatch.cpp +++ b/numpy/_core/src/npysort/x86_simd_qsort.dispatch.cpp @@ -1,6 +1,7 @@ #include "x86_simd_qsort.hpp" #ifndef __CYGWIN__ +#include #include "x86-simd-sort/src/x86simdsort-static-incl.h" #define DISPATCH_SORT_METHODS(TYPE) \ @@ -8,8 +9,9 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(TYPE *arr, npy_intp num, npy_int { \ x86simdsortStatic::qselect(arr, kth, num, true); \ } \ -template<> void NPY_CPU_DISPATCH_CURFX(QSort)(TYPE *arr, npy_intp num) \ +template<> void NPY_CPU_DISPATCH_CURFX(QSort)(TYPE *arr, npy_intp num, bool reverse) \ { \ + assert(!reverse); (void)reverse; \ x86simdsortStatic::qsort(arr, num, true); \ } \ diff --git a/numpy/_core/src/npysort/x86_simd_qsort.hpp b/numpy/_core/src/npysort/x86_simd_qsort.hpp index e12385689deb..c49caaca5294 100644 --- a/numpy/_core/src/npysort/x86_simd_qsort.hpp +++ b/numpy/_core/src/npysort/x86_simd_qsort.hpp @@ -6,15 +6,18 @@ namespace np { namespace qsort_simd { #include "x86_simd_qsort.dispatch.h" -NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size)) +// x86 SIMD sort is ascending-only; ``reverse`` must be false (asserted in +// the implementation). The parameter exists purely so the dispatcher can +// share one function-pointer signature with the Highway path. +NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size, bool reverse)) NPY_CPU_DISPATCH_DECLARE(template void QSelect, (T* arr, npy_intp num, npy_intp kth)) #include "x86_simd_argsort.dispatch.h" -NPY_CPU_DISPATCH_DECLARE(template void ArgQSort, (T *arr, npy_intp* arg, npy_intp size)) +NPY_CPU_DISPATCH_DECLARE(template void ArgQSort, (T *arr, npy_intp* arg, npy_intp size, bool reverse)) NPY_CPU_DISPATCH_DECLARE(template void ArgQSelect, (T *arr, npy_intp* arg, npy_intp kth, npy_intp size)) #include "x86_simd_qsort_16bit.dispatch.h" -NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size)) +NPY_CPU_DISPATCH_DECLARE(template void QSort, (T *arr, npy_intp size, bool reverse)) NPY_CPU_DISPATCH_DECLARE(template void QSelect, (T* arr, npy_intp num, npy_intp kth)) } } // np::qsort_simd diff --git a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp index 063e713c5256..df4a12add727 100644 --- a/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp +++ b/numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp @@ -1,6 +1,7 @@ #include "x86_simd_qsort.hpp" #ifndef __CYGWIN__ +#include #include "x86-simd-sort/src/x86simdsort-static-incl.h" /* * MSVC doesn't set the macro __AVX512VBMI2__ which is required for the 16-bit @@ -37,20 +38,23 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_ /* * QSort dispatch functions: */ -template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size) +template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size, bool reverse) { + assert(!reverse); (void)reverse; #if defined(NPY_HAVE_AVX512_SPR) x86simdsortStatic::qsort(reinterpret_cast<_Float16*>(arr), size, true); #else avx512_qsort_fp16(reinterpret_cast(arr), size, true, false); #endif } -template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size) +template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size, bool reverse) { + assert(!reverse); (void)reverse; x86simdsortStatic::qsort(arr, size); } -template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size) +template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size, bool reverse) { + assert(!reverse); (void)reverse; x86simdsortStatic::qsort(arr, size); } diff --git a/numpy/_core/src/umath/_scaled_float_dtype.c b/numpy/_core/src/umath/_scaled_float_dtype.c index d26cd9b9b7ca..fb65a404563d 100644 --- a/numpy/_core/src/umath/_scaled_float_dtype.c +++ b/numpy/_core/src/umath/_scaled_float_dtype.c @@ -777,6 +777,21 @@ promote_to_sfloat(PyUFuncObject *NPY_UNUSED(ufunc), } +static inline int +cmp(const void *av, const void *bv, void *NPY_UNUSED(arr)) +{ + npy_float64 a = *(const npy_float64 *)av; + npy_float64 b = *(const npy_float64 *)bv; + if (a < b || (b != b && a == a)) { + return -1; + } + if (a > b || (a != a && b == b)) { + return 1; + } + return 0; +} + + NPY_NO_EXPORT int sfloat_stable_sort_loop( PyArrayMethod_Context *context, @@ -792,7 +807,7 @@ sfloat_stable_sort_loop( npy_intp N = dimensions[0]; char *in = data[0]; - return timsort_double(in, N, NULL); + return npy_mergesort_impl(in, N, NULL, strides[0], cmp); } @@ -811,7 +826,7 @@ sfloat_default_sort_loop( npy_intp N = dimensions[0]; char *in = data[0]; - return quicksort_double(in, N, NULL); + return npy_quicksort_impl(in, N, NULL, strides[0], cmp); } @@ -880,7 +895,7 @@ sfloat_stable_argsort_loop( char *in = data[0]; npy_intp *out = (npy_intp *)data[1]; - return atimsort_double(in, out, N, NULL); + return npy_amergesort_impl(in, out, N, NULL, strides[0], cmp); } @@ -900,7 +915,7 @@ sfloat_default_argsort_loop( char *in = data[0]; npy_intp *out = (npy_intp *)data[1]; - return aquicksort_double(in, out, N, NULL); + return npy_aquicksort_impl(in, out, N, NULL, strides[0], cmp); } diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index 26c200971daf..6d936a9d0fdd 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -2747,6 +2747,277 @@ def test_sort_unicode_kind(self): assert_raises(ValueError, d.sort, kind=k) assert_raises(ValueError, d.argsort, kind=k) + def _test_sort_descending_nonan(self, a, stable, descending): + if not descending: + a = a[::-1] + b = a[::-1].copy() + + msg = f"sort, descending={descending}, stable={stable}" + a_sorted = np.sort(a, stable=stable, descending=descending, axis=-1) + assert_equal(a_sorted, b, msg) + + # randomized input + a_randomized = a.copy() + rng = np.random.default_rng(0) + rng.shuffle(a_randomized) + + msg = f"sort, randomized, descending={descending}, stable={stable}" + a_sorted = np.sort(a_randomized, stable=stable, descending=descending, axis=-1) + assert_equal(a_sorted, b, msg) + + def _test_sort_descending_nan(self, a, stable, descending): + if not descending: + a = a[::-1] + # add nans to check that they are sorted to the end + if np.issubdtype(a.dtype, np.complexfloating): + nan = np.nan + 1j * np.nan + elif np.issubdtype(a.dtype, np.floating): + nan = np.nan + elif np.issubdtype(a.dtype, np.datetime64): + nan = np.datetime64('NaT', 'D') + elif np.issubdtype(a.dtype, np.timedelta64): + nan = np.timedelta64('NaT', 'D') + a[::10] = nan + + b = a[::-1].copy() + b = np.concatenate((b[~np.isnan(b)], b[np.isnan(b)])) + + msg = f"sort, descending={descending}, stable={stable}" + a_sorted = np.sort(a, stable=stable, descending=descending, axis=-1) + assert_equal(a_sorted, b, msg) + + # randomized input + a_randomized = a.copy() + rng = np.random.default_rng(0) + rng.shuffle(a_randomized) + + msg = f"sort, randomized, descending={descending}, stable={stable}" + a_sorted = np.sort(a_randomized, stable=stable, descending=descending, axis=-1) + assert_equal(a_sorted, b, msg) + + @pytest.mark.parametrize('dtype', [np.int8, np.int16, np.int32, np.int64]) + @pytest.mark.parametrize('stable', [True, False]) + @pytest.mark.parametrize('descending', [True, False]) + def test_sort_descending_signed(self, dtype, stable, descending): + a = np.arange(-51, 50, dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + + @pytest.mark.parametrize('dtype', [np.uint8, np.uint16, np.uint32, np.uint64]) + @pytest.mark.parametrize('stable', [True, False]) + @pytest.mark.parametrize('descending', [True, False]) + def test_sort_descending_unsigned(self, dtype, stable, descending): + a = np.arange(0, 101, dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + + @pytest.mark.parametrize( + "dtype", [np.float16, np.float32, np.float64, np.longdouble] + ) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_sort_descending_floats(self, dtype, stable, descending): + a = np.linspace(-50, 50, 101, dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + self._test_sort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize("dtype", [np.complex64, np.complex128, np.clongdouble]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_sort_descending_complex(self, dtype, stable, descending): + a = np.arange(-50, 51, dtype=dtype) + 1j * np.arange(-50, 51, dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + self._test_sort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize("dtype", [np.complex64, np.complex128, np.clongdouble]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + @pytest.mark.parametrize("random_seed", [0, 1]) + def test_sort_descending_complex_lexorder(self, dtype, + stable, descending, random_seed): + arange = np.tile(np.arange(25, dtype=dtype), 4) + nans = np.full(100, np.nan + 1j * np.nan, dtype=dtype) + + no_nans = arange + 1j * arange + im_nans = arange + 1j * nans + re_nans = nans + 1j * arange + all_nans = nans + 1j * nans + + a = np.concatenate((no_nans, im_nans, re_nans, all_nans)) + immask = np.isnan(a.imag) + remask = np.isnan(a.real) + + rng = np.random.default_rng(random_seed) + rng.shuffle(a) + + # check that nans are sorted to the end with lexicographic ordering + # no nans -> imaginary nans only -> real nans only -> all nans + a.sort(stable=stable, descending=descending, axis=-1) + immask_sorted = np.isnan(a.imag) + remask_sorted = np.isnan(a.real) + + assert_equal( + immask_sorted, + immask, + f"imag nans mask, dtype={dtype}, stable={stable}, descending={descending}", + ) + assert_equal( + remask_sorted, + remask, + f"real nans mask, dtype={dtype}, stable={stable}, descending={descending}", + ) + + # check lexicographic ordering (real part is more significant) + # for no-nan values only + no_nans = a[~immask_sorted & ~remask_sorted] + + real_diff = np.diff(no_nans.real) + imag_diff = np.diff(no_nans.imag) + + if descending: + real_diff = -real_diff + imag_diff = -imag_diff + + assert_equal( + (real_diff > 0) | ((real_diff == 0) & (imag_diff >= 0)), + True, + f"lexicographic order, dtype={dtype}, stable={stable}, " + f"descending={descending}", + ) + + @pytest.mark.parametrize('dtype', ['datetime64[D]', 'timedelta64[D]']) + @pytest.mark.parametrize('stable', [True, False]) + @pytest.mark.parametrize('descending', [True, False]) + def test_sort_descending_datetime(self, dtype, stable, descending): + a = np.arange(0, 101, dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + self._test_sort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize('dtype', [np.str_, np.bytes_]) + @pytest.mark.parametrize('stable', [True, False]) + @pytest.mark.parametrize('descending', [True, False]) + def test_sort_descending_string(self, dtype, stable, descending): + a = np.array([f"{i:03d}" for i in range(101)], dtype=dtype) + self._test_sort_descending_nonan(a, stable, descending) + + def _test_argsort_descending_nonan(self, a, stable, descending): + expected = np.arange(len(a))[::-1] + if not descending: + expected = expected[::-1] + + idx = np.argsort(a, stable=stable, descending=descending, axis=-1) + msg = f"argsort, dtype={a.dtype}, stable={stable}, descending={descending}" + assert_equal(idx, expected, msg) + + rng = np.random.default_rng(0) + perm = rng.permutation(len(a)) + a_randomized = a[perm] + inverse_perm = np.empty_like(perm) + inverse_perm[perm] = np.arange(len(perm)) + expected = inverse_perm[expected] + + msg = ( + f"argsort, randomized, dtype={a.dtype}, stable={stable}, " + f"descending={descending}" + ) + idx = np.argsort(a_randomized, stable=stable, descending=descending, axis=-1) + assert_equal(idx, expected, msg) + + def _test_argsort_descending_nan(self, a, stable, descending): + if np.issubdtype(a.dtype, np.complexfloating): + nan = np.nan + 1j * np.nan + elif np.issubdtype(a.dtype, np.floating): + nan = np.nan + elif np.issubdtype(a.dtype, np.datetime64): + nan = np.datetime64("NaT", "D") + elif np.issubdtype(a.dtype, np.timedelta64): + nan = np.timedelta64("NaT", "D") + a[::10] = nan + + # comparing datetime types directly to numerical zero fails + zero = 0 + if ( + np.issubdtype(a.dtype, np.datetime64) or + np.issubdtype(a.dtype, np.timedelta64) + ): + zero = np.timedelta64(0, "D") + + idx = np.argsort(a, stable=stable, descending=descending, axis=-1) + sorted_a = a[idx] + diff_sorted_a = np.diff(sorted_a[:-11]) + if descending: + diff_sorted_a = -diff_sorted_a + + msg = f"argsort, dtype={a.dtype}, stable={stable}, descending={descending}" + assert_equal(np.isnan(sorted_a[-11:]), True, msg) # nans at end + + if not np.issubdtype(a.dtype, np.datetime64) and not np.issubdtype( + a.dtype, np.timedelta64 + ): + assert_equal(diff_sorted_a >= zero, True, msg) + + rng = np.random.default_rng(0) + perm = rng.permutation(len(a)) + a_randomized = a[perm] + + idx = np.argsort(a_randomized, stable=stable, descending=descending, axis=-1) + sorted_a = a_randomized[idx] + diff_sorted_a = np.diff(sorted_a[:-11]) + if descending: + diff_sorted_a = -diff_sorted_a + + msg = ( + f"argsort, randomized, dtype={a.dtype}, stable={stable}, " + f"descending={descending}" + ) + assert_equal(np.isnan(sorted_a[-11:]), True, msg) + assert_equal(diff_sorted_a >= zero, True, msg) + + @pytest.mark.parametrize("dtype", [np.int8, np.int16, np.int32, np.int64]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_signed(self, dtype, stable, descending): + a = np.arange(-51, 50, dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + + @pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_unsigned(self, dtype, stable, descending): + a = np.arange(0, 101, dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + + @pytest.mark.parametrize( + "dtype", [np.float16, np.float32, np.float64, np.longdouble] + ) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_floats(self, dtype, stable, descending): + a = np.linspace(-50, 50, 101, dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + self._test_argsort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize("dtype", [np.complex64, np.complex128, np.clongdouble]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_complex(self, dtype, stable, descending): + a = np.arange(-50, 51, dtype=dtype) + 1j * np.arange(-50, 51, dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + self._test_argsort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize("dtype", ["datetime64[D]", "timedelta64[D]"]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_datetime(self, dtype, stable, descending): + a = np.arange(0, 101, dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + self._test_argsort_descending_nan(a, stable, descending) + + @pytest.mark.parametrize("dtype", [np.str_, np.bytes_]) + @pytest.mark.parametrize("stable", [True, False]) + @pytest.mark.parametrize("descending", [True, False]) + def test_argsort_descending_string(self, dtype, stable, descending): + a = np.array([f"{i:03d}" for i in range(101)], dtype=dtype) + self._test_argsort_descending_nonan(a, stable, descending) + @pytest.mark.parametrize('a', [ np.array([0, 1, np.nan], dtype=np.float16), np.array([0, 1, np.nan], dtype=np.float32), diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 7b39da8d9461..c0a856102d4f 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -5605,7 +5605,7 @@ def round(self, decimals=0, out=None): return out def argsort(self, axis=np._NoValue, kind=None, order=None, endwith=True, - fill_value=None, *, stable=False): + fill_value=None, *, stable=False, descending=False): """ Return an ndarray of indices that sort the array along the specified axis. Masked values are filled beforehand to @@ -5633,6 +5633,8 @@ def argsort(self, axis=np._NoValue, kind=None, order=None, endwith=True, If ``fill_value`` is not None, it supersedes ``endwith``. stable : bool, optional Only for compatibility with ``np.argsort``. Ignored. + descending : bool, optional + Only for compatibility with ``np.sort``. Ignored. Returns ------- @@ -5667,6 +5669,11 @@ def argsort(self, axis=np._NoValue, kind=None, order=None, endwith=True, "`stable` parameter is not supported for masked arrays." ) + if descending: + raise ValueError( + "`descending` parameter is not supported for masked arrays." + ) + # 2017-04-11, Numpy 1.13.0, gh-8701: warn on axis default if axis is np._NoValue: axis = _deprecate_argsort_axis(self) @@ -5773,7 +5780,7 @@ def argmax(self, axis=None, fill_value=None, out=None, *, return d.argmax(axis, out=out, keepdims=keepdims) def sort(self, axis=-1, kind=None, order=None, endwith=True, - fill_value=None, *, stable=False): + fill_value=None, *, stable=False, descending=False): """ Sort the array, in-place @@ -5801,6 +5808,8 @@ def sort(self, axis=-1, kind=None, order=None, endwith=True, If ``fill_value`` is not None, it supersedes ``endwith``. stable : bool, optional Only for compatibility with ``np.sort``. Ignored. + descending : bool, optional + Only for compatibility with ``np.sort``. Ignored. See Also -------- @@ -5846,6 +5855,11 @@ def sort(self, axis=-1, kind=None, order=None, endwith=True, "`stable` parameter is not supported for masked arrays." ) + if descending: + raise ValueError( + "`descending` parameter is not supported for masked arrays." + ) + if self._mask is nomask: ndarray.sort(self, axis=axis, kind=kind, order=order) return @@ -7197,7 +7211,7 @@ def power(a, b, third=None): def argsort(a, axis=np._NoValue, kind=None, order=None, endwith=True, - fill_value=None, *, stable=None): + fill_value=None, *, stable=None, descending=None): "Function version of the eponymous method." a = np.asanyarray(a) @@ -7207,15 +7221,16 @@ def argsort(a, axis=np._NoValue, kind=None, order=None, endwith=True, if isinstance(a, MaskedArray): return a.argsort(axis=axis, kind=kind, order=order, endwith=endwith, - fill_value=fill_value, stable=None) + fill_value=fill_value, stable=stable, descending=descending) else: - return a.argsort(axis=axis, kind=kind, order=order, stable=None) + return a.argsort(axis=axis, kind=kind, order=order, stable=stable, + descending=descending) argsort.__doc__ = MaskedArray.argsort.__doc__ def sort(a, axis=-1, kind=None, order=None, endwith=True, fill_value=None, *, - stable=None): + stable=None, descending=None): """ Return a sorted copy of the masked array. @@ -7251,9 +7266,9 @@ def sort(a, axis=-1, kind=None, order=None, endwith=True, fill_value=None, *, if isinstance(a, MaskedArray): a.sort(axis=axis, kind=kind, order=order, endwith=endwith, - fill_value=fill_value, stable=stable) + fill_value=fill_value, stable=stable, descending=descending) else: - a.sort(axis=axis, kind=kind, order=order, stable=stable) + a.sort(axis=axis, kind=kind, order=order, stable=stable, descending=descending) return a diff --git a/numpy/ma/core.pyi b/numpy/ma/core.pyi index 9f7d97c6374e..75c20fcadd64 100644 --- a/numpy/ma/core.pyi +++ b/numpy/ma/core.pyi @@ -2370,6 +2370,7 @@ class MaskedArray(ndarray[_ShapeT_co, _DTypeT_co]): fill_value: _ScalarLike_co | None = None, *, stable: bool = False, + descending: bool = False, ) -> _MaskedArray[intp]: ... # keep in sync with `MaskedArray.argmin` (below) and `ndarray.argmax` @@ -2478,6 +2479,7 @@ class MaskedArray(ndarray[_ShapeT_co, _DTypeT_co]): fill_value: _ScalarLike_co | None = None, *, stable: Literal[False] | None = False, + descending: Literal[False] | None = False, ) -> None: ... # @@ -3640,6 +3642,7 @@ def argsort( fill_value: _ScalarLike_co | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Array1D[np.intp]: ... @overload # MaskedArray, axis: None def argsort( @@ -3651,6 +3654,7 @@ def argsort( fill_value: _ScalarLike_co | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Masked1D[np.intp]: ... @overload # MaskedArray, axis: int-like def argsort( @@ -3662,6 +3666,7 @@ def argsort( fill_value: _ScalarLike_co | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _MaskedArray[np.intp]: ... @overload # array-like, axis: None def argsort( @@ -3673,6 +3678,7 @@ def argsort( fill_value: _ScalarLike_co | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> _Array1D[np.intp]: ... @overload # array-like, axis: int-like def argsort( @@ -3684,6 +3690,7 @@ def argsort( fill_value: _ScalarLike_co | None = None, *, stable: bool | None = None, + descending: bool | None = None, ) -> NDArray[np.intp]: ... # @@ -3697,6 +3704,7 @@ def sort[ArrayT: np.ndarray]( fill_value: _ScalarLike_co | None = None, *, stable: Literal[False] | None = None, + descending: Literal[False] | None = False, ) -> ArrayT: ... @overload def sort( @@ -3708,6 +3716,7 @@ def sort( fill_value: _ScalarLike_co | None = None, *, stable: Literal[False] | None = None, + descending: Literal[False] | None = False, ) -> NDArray[Any]: ... # diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index df26ce13c5fa..a05521767441 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -3844,6 +3844,30 @@ def test_argsort(self): a = array([1, 5, 2, 4, 3], mask=[1, 0, 0, 1, 0]) assert_equal(np.argsort(a), argsort(a)) + def test_sort_stable_or_descending_throws(self): + a = array([1, 5, 2, 4, 3], mask=[1, 0, 0, 1, 0]) + with pytest.raises( + ValueError, match="`stable` parameter is not supported for masked arrays." + ): + sort(a, stable=True) + with pytest.raises( + ValueError, + match="`descending` parameter is not supported for masked arrays.", + ): + sort(a, descending=True) + + def test_argsort_stable_or_descending_throws(self): + a = array([1, 5, 2, 4, 3], mask=[1, 0, 0, 1, 0]) + with pytest.raises( + ValueError, match="`stable` parameter is not supported for masked arrays." + ): + argsort(a, stable=True) + with pytest.raises( + ValueError, + match="`descending` parameter is not supported for masked arrays.", + ): + argsort(a, descending=True) + def test_squeeze(self): # Check squeeze data = masked_array([[1, 2, 3]])