@@ -74,18 +74,6 @@ struct vfloat8
7474 m = _mm256_set1_ps (a);
7575 }
7676
77- /* *
78- * @brief Construct from 8 scalar values.
79- *
80- * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
81- */
82- ASTCENC_SIMD_INLINE explicit vfloat8 (
83- float a, float b, float c, float d,
84- float e, float f, float g, float h)
85- {
86- m = _mm256_set_ps (h, g, f, e, d, c, b, a);
87- }
88-
8977 /* *
9078 * @brief Construct from an existing SIMD register.
9179 */
@@ -94,20 +82,6 @@ struct vfloat8
9482 m = a;
9583 }
9684
97- /* *
98- * @brief Get the scalar value of a single lane.
99- */
100- template <int l> ASTCENC_SIMD_INLINE float lane () const
101- {
102- #if !defined(__clang__) && defined(_MSC_VER)
103- return m.m256_f32 [l];
104- #else
105- union { __m256 m; float f[8 ]; } cvt;
106- cvt.m = m;
107- return cvt.f [l];
108- #endif
109- }
110-
11185 /* *
11286 * @brief Factory that returns a vector of zeros.
11387 */
@@ -132,14 +106,6 @@ struct vfloat8
132106 return vfloat8 (_mm256_load_ps (p));
133107 }
134108
135- /* *
136- * @brief Factory that returns a vector containing the lane IDs.
137- */
138- static ASTCENC_SIMD_INLINE vfloat8 lane_id ()
139- {
140- return vfloat8 (_mm256_set_ps (7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ));
141- }
142-
143109 /* *
144110 * @brief The vector ...
145111 */
@@ -190,18 +156,6 @@ struct vint8
190156 m = _mm256_set1_epi32 (a);
191157 }
192158
193- /* *
194- * @brief Construct from 8 scalar values.
195- *
196- * The value of @c a is stored to lane 0 (LSB) in the SIMD register.
197- */
198- ASTCENC_SIMD_INLINE explicit vint8 (
199- int a, int b, int c, int d,
200- int e, int f, int g, int h)
201- {
202- m = _mm256_set_epi32 (h, g, f, e, d, c, b, a);
203- }
204-
205159 /* *
206160 * @brief Construct from an existing SIMD register.
207161 */
@@ -210,20 +164,6 @@ struct vint8
210164 m = a;
211165 }
212166
213- /* *
214- * @brief Get the scalar from a single lane.
215- */
216- template <int l> ASTCENC_SIMD_INLINE int lane () const
217- {
218- #if !defined(__clang__) && defined(_MSC_VER)
219- return m.m256i_i32 [l];
220- #else
221- union { __m256i m; int f[8 ]; } cvt;
222- cvt.m = m;
223- return cvt.f [l];
224- #endif
225- }
226-
227167 /* *
228168 * @brief Factory that returns a vector of zeros.
229169 */
@@ -528,6 +468,14 @@ ASTCENC_SIMD_INLINE vint8 hmin(vint8 a)
528468 return vmin;
529469}
530470
471+ /* *
472+ * @brief Return the horizontal minimum of a vector.
473+ */
474+ ASTCENC_SIMD_INLINE int hmin_s (vint8 a)
475+ {
476+ return _mm256_cvtsi256_si32 (hmin (a).m );
477+ }
478+
531479/* *
532480 * @brief Return the horizontal maximum of a vector.
533481 */
@@ -543,6 +491,14 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a)
543491 return vmax;
544492}
545493
494+ /* *
495+ * @brief Return the horizontal maximum of a vector.
496+ */
497+ ASTCENC_SIMD_INLINE int hmax_s (vint8 a)
498+ {
499+ return _mm256_cvtsi256_si32 (hmax (a).m );
500+ }
501+
546502/* *
547503 * @brief Store a vector to a 16B aligned memory address.
548504 */
@@ -570,14 +526,6 @@ ASTCENC_SIMD_INLINE void store_nbytes(vint8 a, uint8_t* p)
570526 _mm_storel_epi64 (reinterpret_cast <__m128i*>(p), _mm256_extracti128_si256 (a.m , 0 ));
571527}
572528
573- /* *
574- * @brief Gather N (vector width) indices from the array.
575- */
576- ASTCENC_SIMD_INLINE vint8 gatheri (const int * base, vint8 indices)
577- {
578- return vint8 (_mm256_i32gather_epi32 (base, indices.m , 4 ));
579- }
580-
581529/* *
582530 * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector.
583531 */
@@ -786,19 +734,6 @@ ASTCENC_SIMD_INLINE vfloat8 clamp(float min, float max, vfloat8 a)
786734 return a;
787735}
788736
789- /* *
790- * @brief Return a clamped value between 0.0f and max.
791- *
792- * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will
793- * be returned for that lane.
794- */
795- ASTCENC_SIMD_INLINE vfloat8 clampz (float max, vfloat8 a)
796- {
797- a.m = _mm256_max_ps (a.m , _mm256_setzero_ps ());
798- a.m = _mm256_min_ps (a.m , _mm256_set1_ps (max));
799- return a;
800- }
801-
802737/* *
803738 * @brief Return a clamped value between 0.0f and 1.0f.
804739 *
@@ -857,7 +792,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmin(vfloat8 a)
857792 */
858793ASTCENC_SIMD_INLINE float hmin_s (vfloat8 a)
859794{
860- return hmin (a).lane < 0 >( );
795+ return _mm256_cvtss_f32 ( hmin (a).m );
861796}
862797
863798/* *
@@ -887,7 +822,7 @@ ASTCENC_SIMD_INLINE vfloat8 hmax(vfloat8 a)
887822 */
888823ASTCENC_SIMD_INLINE float hmax_s (vfloat8 a)
889824{
890- return hmax (a).lane < 0 >( );
825+ return _mm256_cvtss_f32 ( hmax (a).m );
891826}
892827
893828/* *
@@ -1146,7 +1081,7 @@ ASTCENC_SIMD_INLINE vint8 vtable_8bt_32bi(vint8 t0, vint8 t1, vint8 t2, vint8 t3
11461081 * @brief Return a vector of interleaved RGBA data.
11471082 *
11481083 * Input vectors have the value stored in the bottom 8 bits of each lane,
1149- * with high bits set to zero.
1084+ * with high bits set to zero.
11501085 *
11511086 * Output vector stores a single RGBA texel packed in each lane.
11521087 */
0 commit comments