Skip to content

Commit e5e4ca4

Browse files
authored
Avoid alignas(0) by wrapping in a macro (#452)
C++11 states that alignas(0) should be ignored, but this is broken on at least some versions of GCC (e.g. 11.3) and generates a warning. This PR changes the use of alignas() to wrap the entire alignas() in a macro, instead of just the alignment value, allowing us to omit the annotation when the alignment is zero.
1 parent b0ca583 commit e5e4ca4

12 files changed

Lines changed: 76 additions & 55 deletions

Docs/ChangeLog-4x.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ The 4.7.0 release is a maintenance release.
1818
mode rounding rules for the alpha channel.
1919
* **Bug fix:** Linear LDR decompression now uses correct `decode_unorm8`
2020
decode mode rounding rules when writing to an 8-bit output image.
21+
* **Bug fix:** Avoid using `alignas()` the reference C implementation, as the
22+
default `alignas(16)` is narrower than the native alignment on some CPUs.
2123
* **Feature:** Library configuration supports a new flag,
2224
`ASTCENC_FLG_USE_DECODE_UNORM8`. This flag indicates that the image will be
2325
used with the `decode_unorm8` decode mode. When set during compression

Source/UnitTest/test_simd.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2020-2022 Arm Limited
3+
// Copyright 2020-2024 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -930,7 +930,7 @@ TEST(vfloat4, gatherf)
930930
/** @brief Test vfloat4 storea. */
931931
TEST(vfloat4, storea)
932932
{
933-
alignas(16) float out[4];
933+
ASTCENC_ALIGNAS float out[4];
934934
vfloat4 a(f32_data);
935935
storea(a, out);
936936
EXPECT_EQ(out[0], 0.0f);
@@ -942,7 +942,7 @@ TEST(vfloat4, storea)
942942
/** @brief Test vfloat4 store. */
943943
TEST(vfloat4, store)
944944
{
945-
alignas(16) float out[5];
945+
ASTCENC_ALIGNAS float out[5];
946946
vfloat4 a(f32_data);
947947
store(a, &(out[1]));
948948
EXPECT_EQ(out[1], 0.0f);
@@ -1725,7 +1725,7 @@ TEST(vint4, two_to_the_n)
17251725
/** @brief Test vint4 storea. */
17261726
TEST(vint4, storea)
17271727
{
1728-
alignas(16) int out[4];
1728+
ASTCENC_ALIGNAS int out[4];
17291729
vint4 a(s32_data);
17301730
storea(a, out);
17311731
EXPECT_EQ(out[0], 0);
@@ -1737,7 +1737,7 @@ TEST(vint4, storea)
17371737
/** @brief Test vint4 store. */
17381738
TEST(vint4, store)
17391739
{
1740-
alignas(16) int out[5];
1740+
ASTCENC_ALIGNAS int out[5];
17411741
vint4 a(s32_data);
17421742
store(a, &(out[1]));
17431743
EXPECT_EQ(out[1], 0);
@@ -1749,7 +1749,7 @@ TEST(vint4, store)
17491749
/** @brief Test vint4 store_nbytes. */
17501750
TEST(vint4, store_nbytes)
17511751
{
1752-
alignas(16) int out;
1752+
ASTCENC_ALIGNAS int out;
17531753
vint4 a(42, 314, 75, 90);
17541754
store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
17551755
EXPECT_EQ(out, 42);

Source/astcenc_compress_symbolic.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ static bool realign_weights_decimated(
247247
}
248248

249249
// Create an unquantized weight grid for this decimation level
250-
alignas(ASTCENC_VECALIGN) float uq_weightsf[BLOCK_MAX_WEIGHTS];
250+
ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS];
251251
for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
252252
{
253253
vint unquant_value(dec_weights_uquant + we_idx);
@@ -467,7 +467,7 @@ static float compress_symbolic_block_for_partition_1plane(
467467

468468
qwt_bitcounts[i] = static_cast<int8_t>(bitcount);
469469

470-
alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
470+
ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
471471

472472
// Generate the optimized set of weights for the weight mode
473473
compute_quantized_weights_for_decimation(
@@ -830,7 +830,7 @@ static float compress_symbolic_block_for_partition_2planes(
830830
unsigned int decimation_mode = bm.decimation_mode;
831831
const auto& di = bsd.get_decimation_info(decimation_mode);
832832

833-
alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
833+
ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
834834

835835
// Generate the optimized set of weights for the mode
836836
compute_quantized_weights_for_decimation(

Source/astcenc_decompress_symbolic.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ float compute_symbolic_block_difference_1plane_1partition(
533533
const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
534534

535535
// Unquantize and undecimate the weights
536-
alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
536+
ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];
537537
unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
538538

539539
// Decode the color endpoints for this partition

Source/astcenc_entry.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,12 @@ astcenc_error astcenc_context_alloc(
699699
}
700700

701701
ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
702+
if (!ctx->bsd)
703+
{
704+
delete ctxo;
705+
return ASTCENC_ERR_OUT_OF_MEM;
706+
}
707+
702708
bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
703709
init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
704710
can_omit_modes,

Source/astcenc_ideal_endpoints_and_weights.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2023 Arm Limited
3+
// Copyright 2011-2024 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -873,7 +873,7 @@ void compute_ideal_weights_for_decimation(
873873
}
874874

875875
// Otherwise compute an estimate and perform single refinement iteration
876-
alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS];
876+
ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];
877877

878878
// Compute an initial average for each decimated weight
879879
bool constant_wes = ei.is_constant_weight_error_scale;
@@ -1171,15 +1171,15 @@ void recompute_ideal_colors_1plane(
11711171
promise(total_texel_count > 0);
11721172
promise(partition_count > 0);
11731173

1174-
alignas(ASTCENC_VECALIGN) float dec_weight[BLOCK_MAX_WEIGHTS];
1174+
ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS];
11751175
for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
11761176
{
11771177
vint unquant_value(dec_weights_uquant + i);
11781178
vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f);
11791179
storea(unquant_valuef, dec_weight + i);
11801180
}
11811181

1182-
alignas(ASTCENC_VECALIGN) float undec_weight[BLOCK_MAX_TEXELS];
1182+
ASTCENC_ALIGNAS float undec_weight[BLOCK_MAX_TEXELS];
11831183
float* undec_weight_ref;
11841184
if (di.max_texel_weight_count == 1)
11851185
{
@@ -1394,8 +1394,8 @@ void recompute_ideal_colors_2planes(
13941394
promise(total_texel_count > 0);
13951395
promise(weight_count > 0);
13961396

1397-
alignas(ASTCENC_VECALIGN) float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
1398-
alignas(ASTCENC_VECALIGN) float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
1397+
ASTCENC_ALIGNAS float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
1398+
ASTCENC_ALIGNAS float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
13991399

14001400
assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
14011401

@@ -1410,8 +1410,8 @@ void recompute_ideal_colors_2planes(
14101410
storea(unquant_value2f, dec_weight_plane2 + i);
14111411
}
14121412

1413-
alignas(ASTCENC_VECALIGN) float undec_weight_plane1[BLOCK_MAX_TEXELS];
1414-
alignas(ASTCENC_VECALIGN) float undec_weight_plane2[BLOCK_MAX_TEXELS];
1413+
ASTCENC_ALIGNAS float undec_weight_plane1[BLOCK_MAX_TEXELS];
1414+
ASTCENC_ALIGNAS float undec_weight_plane2[BLOCK_MAX_TEXELS];
14151415

14161416
float* undec_weight_plane1_ref;
14171417
float* undec_weight_plane2_ref;

Source/astcenc_image.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2022 Arm Limited
3+
// Copyright 2011-2024 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -109,7 +109,7 @@ static vfloat4 swz_texel(
109109
vfloat4 data,
110110
const astcenc_swizzle& swz
111111
) {
112-
alignas(16) float datas[6];
112+
ASTCENC_ALIGNAS float datas[6];
113113

114114
storea(data, datas);
115115
datas[ASTCENC_SWZ_0] = 0.0f;

Source/astcenc_internal.h

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ struct decimation_info
385385
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
386386
* Value is between 0 and 1, stored transposed to improve vectorization.
387387
*/
388-
alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
388+
ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
389389

390390
/** @brief The number of texels that each stored weight contributes to. */
391391
uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
@@ -400,7 +400,7 @@ struct decimation_info
400400
* @brief The bilinear contribution to the N texels that use each weight.
401401
* Value is between 0 and 1, stored transposed to improve vectorization.
402402
*/
403-
alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
403+
ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
404404

405405
/**
406406
* @brief The bilinear contribution to the Nth texel that uses each weight.
@@ -580,7 +580,7 @@ struct block_size_descriptor
580580
decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
581581

582582
/** @brief The active decimation tables, stored in low indices. */
583-
alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
583+
ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
584584

585585
/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
586586
uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
@@ -740,16 +740,16 @@ struct block_size_descriptor
740740
struct image_block
741741
{
742742
/** @brief The input (compress) or output (decompress) data for the red color component. */
743-
alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
743+
ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS];
744744

745745
/** @brief The input (compress) or output (decompress) data for the green color component. */
746-
alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
746+
ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS];
747747

748748
/** @brief The input (compress) or output (decompress) data for the blue color component. */
749-
alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
749+
ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS];
750750

751751
/** @brief The input (compress) or output (decompress) data for the alpha color component. */
752-
alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
752+
ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS];
753753

754754
/** @brief The number of texels in the block. */
755755
uint8_t texel_count;
@@ -901,10 +901,10 @@ struct endpoints_and_weights
901901
endpoints ep;
902902

903903
/** @brief The ideal weight for each texel; may be undecimated or decimated. */
904-
alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
904+
ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];
905905

906906
/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
907-
alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
907+
ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
908908
};
909909

910910
/**
@@ -934,7 +934,7 @@ struct encoding_choice_errors
934934
/**
935935
* @brief Preallocated working buffers, allocated per thread during context creation.
936936
*/
937-
struct alignas(ASTCENC_VECALIGN) compression_working_buffers
937+
struct ASTCENC_ALIGNAS compression_working_buffers
938938
{
939939
/** @brief Ideal endpoints and weights for plane 1. */
940940
endpoints_and_weights ei1;
@@ -950,7 +950,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
950950
*
951951
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
952952
*/
953-
alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
953+
ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
954954

955955
/**
956956
* @brief Decimated quantized weight values in the unquantized 0-64 range.
@@ -960,7 +960,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
960960
uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
961961

962962
/** @brief Error of the best encoding combination for each block mode. */
963-
alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
963+
ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
964964

965965
/** @brief The best color quant for each block mode. */
966966
uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
@@ -2173,10 +2173,11 @@ Platform-specific functions.
21732173
/**
21742174
* @brief Allocate an aligned memory buffer.
21752175
*
2176-
* Allocated memory must be freed by aligned_free;
2176+
* Allocated memory must be freed by aligned_free.
21772177
*
21782178
* @param size The desired buffer size.
2179-
* @param align The desired buffer alignment; must be 2^N.
2179+
* @param align The desired buffer alignment; must be 2^N, may be increased
2180+
* by the implementation to a minimum allowable alignment.
21802181
*
21812182
* @return The memory buffer pointer or nullptr on allocation failure.
21822183
*/
@@ -2186,10 +2187,14 @@ T* aligned_malloc(size_t size, size_t align)
21862187
void* ptr;
21872188
int error = 0;
21882189

2190+
// Don't allow this to under-align a type
2191+
size_t min_align = astc::max(alignof(T), sizeof(void*));
2192+
size_t real_align = astc::max(min_align, align);
2193+
21892194
#if defined(_WIN32)
2190-
ptr = _aligned_malloc(size, align);
2195+
ptr = _aligned_malloc(size, real_align);
21912196
#else
2192-
error = posix_memalign(&ptr, align, size);
2197+
error = posix_memalign(&ptr, real_align, size);
21932198
#endif
21942199

21952200
if (error || (!ptr))

Source/astcenc_mathlib.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2023 Arm Limited
3+
// Copyright 2011-2024 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -83,6 +83,14 @@
8383
#define ASTCENC_VECALIGN 0
8484
#endif
8585

86+
// C++11 states that alignas(0) should be ignored but GCC doesn't do
87+
// this on some versions, so workaround and avoid emitting alignas(0)
88+
#if ASTCENC_VECALIGN > 0
89+
#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
90+
#else
91+
#define ASTCENC_ALIGNAS
92+
#endif
93+
8694
#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
8795
#include <immintrin.h>
8896
#endif

Source/astcenc_vecmathlib_avx2_8.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2019-2022 Arm Limited
3+
// Copyright 2019-2024 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -1170,7 +1170,7 @@ ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint8 data, vmask8 ma
11701170
*/
11711171
ASTCENC_SIMD_INLINE void print(vint8 a)
11721172
{
1173-
alignas(ASTCENC_VECALIGN) int v[8];
1173+
alignas(32) int v[8];
11741174
storea(a, v);
11751175
printf("v8_i32:\n %8d %8d %8d %8d %8d %8d %8d %8d\n",
11761176
v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1181,7 +1181,7 @@ ASTCENC_SIMD_INLINE void print(vint8 a)
11811181
*/
11821182
ASTCENC_SIMD_INLINE void printx(vint8 a)
11831183
{
1184-
alignas(ASTCENC_VECALIGN) int v[8];
1184+
alignas(32) int v[8];
11851185
storea(a, v);
11861186
printf("v8_i32:\n %08x %08x %08x %08x %08x %08x %08x %08x\n",
11871187
v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1192,7 +1192,7 @@ ASTCENC_SIMD_INLINE void printx(vint8 a)
11921192
*/
11931193
ASTCENC_SIMD_INLINE void print(vfloat8 a)
11941194
{
1195-
alignas(ASTCENC_VECALIGN) float v[8];
1195+
alignas(32) float v[8];
11961196
storea(a, v);
11971197
printf("v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
11981198
static_cast<double>(v[0]), static_cast<double>(v[1]),

0 commit comments

Comments
 (0)