Avoid alignas(0) by wrapping in a macro (#452)

solidpixel · web-flow · commit e5e4ca4ff30f · 2024-01-11T12:18:16.000Z
C++11 states that alignas(0) should be ignored, but this is broken on at least some versions of GCC (e.g. 11.3) and generates a warning. This PR changes the use of alignas() to wrap the entire alignas() in a macro, instead of just the alignment value, allowing us to omit the annotation when the alignment is zero.
diff --git a/Docs/ChangeLog-4x.md b/Docs/ChangeLog-4x.md
@@ -18,6 +18,8 @@ The 4.7.0 release is a maintenance release.
     mode rounding rules for the alpha channel.
   * **Bug fix:** Linear LDR decompression now uses correct `decode_unorm8`
     decode mode rounding rules when writing to an 8-bit output image.
+  * **Bug fix:** Avoid using `alignas()` the reference C implementation, as the
+    default `alignas(16)` is narrower than the native alignment on some CPUs.
   * **Feature:** Library configuration supports a new flag,
     `ASTCENC_FLG_USE_DECODE_UNORM8`. This flag indicates that the image will be
     used with the `decode_unorm8` decode mode. When set during compression
diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020-2022 Arm Limited
+// Copyright 2020-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -930,7 +930,7 @@ TEST(vfloat4, gatherf)
 /** @brief Test vfloat4 storea. */
 TEST(vfloat4, storea)
 {
-	alignas(16) float out[4];
+	ASTCENC_ALIGNAS float out[4];
 	vfloat4 a(f32_data);
 	storea(a, out);
 	EXPECT_EQ(out[0], 0.0f);
@@ -942,7 +942,7 @@ TEST(vfloat4, storea)
 /** @brief Test vfloat4 store. */
 TEST(vfloat4, store)
 {
-	alignas(16) float out[5];
+	ASTCENC_ALIGNAS float out[5];
 	vfloat4 a(f32_data);
 	store(a, &(out[1]));
 	EXPECT_EQ(out[1], 0.0f);
@@ -1725,7 +1725,7 @@ TEST(vint4, two_to_the_n)
 /** @brief Test vint4 storea. */
 TEST(vint4, storea)
 {
-	alignas(16) int out[4];
+	ASTCENC_ALIGNAS int out[4];
 	vint4 a(s32_data);
 	storea(a, out);
 	EXPECT_EQ(out[0], 0);
@@ -1737,7 +1737,7 @@ TEST(vint4, storea)
 /** @brief Test vint4 store. */
 TEST(vint4, store)
 {
-	alignas(16) int out[5];
+	ASTCENC_ALIGNAS int out[5];
 	vint4 a(s32_data);
 	store(a, &(out[1]));
 	EXPECT_EQ(out[1], 0);
@@ -1749,7 +1749,7 @@ TEST(vint4, store)
 /** @brief Test vint4 store_nbytes. */
 TEST(vint4, store_nbytes)
 {
-	alignas(16) int out;
+	ASTCENC_ALIGNAS int out;
 	vint4 a(42, 314, 75, 90);
 	store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
 	EXPECT_EQ(out, 42);
diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp
@@ -247,7 +247,7 @@ static bool realign_weights_decimated(
 		}
 
 		// Create an unquantized weight grid for this decimation level
-		alignas(ASTCENC_VECALIGN) float uq_weightsf[BLOCK_MAX_WEIGHTS];
+		ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS];
 		for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
 		{
 			vint unquant_value(dec_weights_uquant + we_idx);
@@ -467,7 +467,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
 		qwt_bitcounts[i] = static_cast<int8_t>(bitcount);
 
-		alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
+		ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
 
 		// Generate the optimized set of weights for the weight mode
 		compute_quantized_weights_for_decimation(
@@ -830,7 +830,7 @@ static float compress_symbolic_block_for_partition_2planes(
 		unsigned int decimation_mode = bm.decimation_mode;
 		const auto& di = bsd.get_decimation_info(decimation_mode);
 
-		alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
+		ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
 
 		// Generate the optimized set of weights for the mode
 		compute_quantized_weights_for_decimation(
diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp
@@ -533,7 +533,7 @@ float compute_symbolic_block_difference_1plane_1partition(
 	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
 
 	// Unquantize and undecimate the weights
-	alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];
 	unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
 
 	// Decode the color endpoints for this partition
diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp
@@ -699,6 +699,12 @@ astcenc_error astcenc_context_alloc(
 	}
 
 	ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
+	if (!ctx->bsd)
+	{
+		delete ctxo;
+		return ASTCENC_ERR_OUT_OF_MEM;
+	}
+
 	bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
 	init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
 	                           can_omit_modes,
diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2023 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -873,7 +873,7 @@ void compute_ideal_weights_for_decimation(
 	}
 
 	// Otherwise compute an estimate and perform single refinement iteration
-	alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];
 
 	// Compute an initial average for each decimated weight
 	bool constant_wes = ei.is_constant_weight_error_scale;
@@ -1171,15 +1171,15 @@ void recompute_ideal_colors_1plane(
 	promise(total_texel_count > 0);
 	promise(partition_count > 0);
 
-	alignas(ASTCENC_VECALIGN) float dec_weight[BLOCK_MAX_WEIGHTS];
+	ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS];
 	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vint unquant_value(dec_weights_uquant + i);
 		vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f);
 		storea(unquant_valuef, dec_weight + i);
 	}
 
-	alignas(ASTCENC_VECALIGN) float undec_weight[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float undec_weight[BLOCK_MAX_TEXELS];
 	float* undec_weight_ref;
 	if (di.max_texel_weight_count == 1)
 	{
@@ -1394,8 +1394,8 @@ void recompute_ideal_colors_2planes(
 	promise(total_texel_count > 0);
 	promise(weight_count > 0);
 
-	alignas(ASTCENC_VECALIGN) float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
-	alignas(ASTCENC_VECALIGN) float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
+	ASTCENC_ALIGNAS float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
+	ASTCENC_ALIGNAS float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
 
 	assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
 
@@ -1410,8 +1410,8 @@ void recompute_ideal_colors_2planes(
 		storea(unquant_value2f, dec_weight_plane2 + i);
 	}
 
-	alignas(ASTCENC_VECALIGN) float undec_weight_plane1[BLOCK_MAX_TEXELS];
-	alignas(ASTCENC_VECALIGN) float undec_weight_plane2[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float undec_weight_plane1[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float undec_weight_plane2[BLOCK_MAX_TEXELS];
 
 	float* undec_weight_plane1_ref;
 	float* undec_weight_plane2_ref;
diff --git a/Source/astcenc_image.cpp b/Source/astcenc_image.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2022 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -109,7 +109,7 @@ static vfloat4 swz_texel(
 	vfloat4 data,
 	const astcenc_swizzle& swz
 ) {
-	alignas(16) float datas[6];
+	ASTCENC_ALIGNAS float datas[6];
 
 	storea(data, datas);
 	datas[ASTCENC_SWZ_0] = 0.0f;
diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h
@@ -385,7 +385,7 @@ struct decimation_info
 	 * @brief The bilinear contribution of the N weights that are interpolated for each texel.
 	 * Value is between 0 and 1, stored transposed to improve vectorization.
 	 */
-	alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
 
 	/** @brief The number of texels that each stored weight contributes to. */
 	uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
@@ -400,7 +400,7 @@ struct decimation_info
 	 * @brief The bilinear contribution to the N texels that use each weight.
 	 * Value is between 0 and 1, stored transposed to improve vectorization.
 	 */
-	alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
+	ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
 
 	/**
 	 * @brief The bilinear contribution to the Nth texel that uses each weight.
@@ -580,7 +580,7 @@ struct block_size_descriptor
 	decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
 
 	/** @brief The active decimation tables, stored in low indices. */
-	alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
+	ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
 
 	/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
 	uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
@@ -740,16 +740,16 @@ struct block_size_descriptor
 struct image_block
 {
 	/** @brief The input (compress) or output (decompress) data for the red color component. */
-	alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS];
 
 	/** @brief The input (compress) or output (decompress) data for the green color component. */
-	alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS];
 
 	/** @brief The input (compress) or output (decompress) data for the blue color component. */
-	alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS];
 
 	/** @brief The input (compress) or output (decompress) data for the alpha color component. */
-	alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS];
 
 	/** @brief The number of texels in the block. */
 	uint8_t texel_count;
@@ -901,10 +901,10 @@ struct endpoints_and_weights
 	endpoints ep;
 
 	/** @brief The ideal weight for each texel; may be undecimated or decimated. */
-	alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];
 
 	/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
-	alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
+	ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
 };
 
 /**
@@ -934,7 +934,7 @@ struct encoding_choice_errors
 /**
  * @brief Preallocated working buffers, allocated per thread during context creation.
  */
-struct alignas(ASTCENC_VECALIGN) compression_working_buffers
+struct ASTCENC_ALIGNAS compression_working_buffers
 {
 	/** @brief Ideal endpoints and weights for plane 1. */
 	endpoints_and_weights ei1;
@@ -950,7 +950,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
 	 *
 	 * For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
 	 */
-	alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
+	ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
 
 	/**
 	 * @brief Decimated quantized weight values in the unquantized 0-64 range.
@@ -960,7 +960,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
 	uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
 
 	/** @brief Error of the best encoding combination for each block mode. */
-	alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
+	ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
 
 	/** @brief The best color quant for each block mode. */
 	uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
@@ -2173,10 +2173,11 @@ Platform-specific functions.
 /**
  * @brief Allocate an aligned memory buffer.
  *
- * Allocated memory must be freed by aligned_free;
+ * Allocated memory must be freed by aligned_free.
  *
  * @param size    The desired buffer size.
- * @param align   The desired buffer alignment; must be 2^N.
+ * @param align   The desired buffer alignment; must be 2^N, may be increased
+ *                by the implementation to a minimum allowable alignment.
  *
  * @return The memory buffer pointer or nullptr on allocation failure.
  */
@@ -2186,10 +2187,14 @@ T* aligned_malloc(size_t size, size_t align)
 	void* ptr;
 	int error = 0;
 
+	// Don't allow this to under-align a type
+	size_t min_align = astc::max(alignof(T), sizeof(void*));
+	size_t real_align = astc::max(min_align, align);
+
 #if defined(_WIN32)
-	ptr = _aligned_malloc(size, align);
+	ptr = _aligned_malloc(size, real_align);
 #else
-	error = posix_memalign(&ptr, align, size);
+	error = posix_memalign(&ptr, real_align, size);
 #endif
 
 	if (error || (!ptr))
diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2023 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -83,6 +83,14 @@
   #define ASTCENC_VECALIGN 0
 #endif
 
+// C++11 states that alignas(0) should be ignored but GCC doesn't do
+// this on some versions, so workaround and avoid emitting alignas(0)
+#if ASTCENC_VECALIGN > 0
+	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
+#else
+	#define ASTCENC_ALIGNAS
+#endif
+
 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
 	#include <immintrin.h>
 #endif
diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2019-2022 Arm Limited
+// Copyright 2019-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -1170,7 +1170,7 @@ ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint8 data, vmask8 ma
  */
 ASTCENC_SIMD_INLINE void print(vint8 a)
 {
-	alignas(ASTCENC_VECALIGN) int v[8];
+	alignas(32) int v[8];
 	storea(a, v);
 	printf("v8_i32:\n  %8d %8d %8d %8d %8d %8d %8d %8d\n",
 	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1181,7 +1181,7 @@ ASTCENC_SIMD_INLINE void print(vint8 a)
  */
 ASTCENC_SIMD_INLINE void printx(vint8 a)
 {
-	alignas(ASTCENC_VECALIGN) int v[8];
+	alignas(32) int v[8];
 	storea(a, v);
 	printf("v8_i32:\n  %08x %08x %08x %08x %08x %08x %08x %08x\n",
 	       v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1192,7 +1192,7 @@ ASTCENC_SIMD_INLINE void printx(vint8 a)
  */
 ASTCENC_SIMD_INLINE void print(vfloat8 a)
 {
-	alignas(ASTCENC_VECALIGN) float v[8];
+	alignas(32) float v[8];
 	storea(a, v);
 	printf("v8_f32:\n  %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
 	       static_cast<double>(v[0]), static_cast<double>(v[1]),
diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020-2021 Arm Limited
+// Copyright 2020-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -383,7 +383,7 @@ static ASTCENC_SIMD_INLINE void bit_transfer_signed(
  */
 ASTCENC_SIMD_INLINE void print(vint4 a)
 {
-	alignas(16) int v[4];
+	ASTCENC_ALIGNAS int v[4];
 	storea(a, v);
 	printf("v4_i32:\n  %8d %8d %8d %8d\n",
 	       v[0], v[1], v[2], v[3]);
@@ -394,7 +394,7 @@ ASTCENC_SIMD_INLINE void print(vint4 a)
  */
 ASTCENC_SIMD_INLINE void printx(vint4 a)
 {
-	alignas(16) int v[4];
+	ASTCENC_ALIGNAS int v[4];
 	storea(a, v);
 	printf("v4_i32:\n  %08x %08x %08x %08x\n",
 	       v[0], v[1], v[2], v[3]);
@@ -405,7 +405,7 @@ ASTCENC_SIMD_INLINE void printx(vint4 a)
  */
 ASTCENC_SIMD_INLINE void print(vfloat4 a)
 {
-	alignas(16) float v[4];
+	ASTCENC_ALIGNAS float v[4];
 	storea(a, v);
 	printf("v4_f32:\n  %0.4f %0.4f %0.4f %0.4f\n",
 	       static_cast<double>(v[0]), static_cast<double>(v[1]),
diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp