Skip to content

Commit 21b7c1c

Browse files
authored
Move ISA check out of core library (#401)
This PR moves the responsibility for the ISA compatibility checks out of the core library, and into the calling code. This avoids complicating the library build and link, given that for most practical usage the caller either knows it's safe or must performs similar checks itself so it can dlopen() the correct dynamic library. For the command line tool, the check has been moved into the front-end wrapper. Fixes #384
1 parent 5f672d2 commit 21b7c1c

7 files changed

Lines changed: 135 additions & 93 deletions

File tree

Docs/ChangeLog-4x.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,18 @@ clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads.
1111

1212
**Status:** In development
1313

14-
The 4.3.1 release is a minor development release.
14+
The 4.4.0 release is a minor development release.
1515

1616
* **General:**
17+
* **Change:** Core library no longer checks availability of required
18+
instruction set extensions, such as SSE4.1 or AVX2. Checking compatibility
19+
is now the responsibility of the caller. See `astcenc_entry.cpp` for an
20+
example of code performing this check.
1721
* **Change:** Command line errors print to stderr instead of stdout.
1822
* **Change:** Color encoding uses new quantization tables, that now factor
19-
in floating-point rounding if a distance tie is found using the integer
20-
quant256 value. This improves image quality for 4x4 and 5x5 block sizes.
23+
in floating-point rounding if a distance tie is found when using the
24+
integer quant256 value. This improves image quality for 4x4 and 5x5 block
25+
sizes.
2126
* **Optimization:** Partition selection uses a simplified line calculation
2227
with a faster approximation. This improves performance for all block sizes.
2328
* **Bug-fix:** Fixed infinity handling in debug trace JSON files.

Source/astcenc.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@
4343
* for faster processing. The caller is responsible for creating the worker threads, and
4444
* synchronizing between images.
4545
*
46+
* Extended instruction set support
47+
* ================================
48+
*
49+
* This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
50+
* enabled at compile time when building the library. There is no runtime checking in the core
51+
* library that the instruction sets used are actually available. Checking compatibility is the
52+
* responsibility of the calling code.
53+
*
4654
* Threading
4755
* =========
4856
*
@@ -191,8 +199,6 @@ enum astcenc_error {
191199
ASTCENC_ERR_OUT_OF_MEM,
192200
/** @brief The call failed due to the build using fast math. */
193201
ASTCENC_ERR_BAD_CPU_FLOAT,
194-
/** @brief The call failed due to the build using an unsupported ISA. */
195-
ASTCENC_ERR_BAD_CPU_ISA,
196202
/** @brief The call failed due to an out-of-spec parameter. */
197203
ASTCENC_ERR_BAD_PARAM,
198204
/** @brief The call failed due to an out-of-spec block size. */

Source/astcenc_entry.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -157,48 +157,6 @@ static astcenc_error validate_cpu_float()
157157
return ASTCENC_SUCCESS;
158158
}
159159

160-
/**
161-
* @brief Validate CPU ISA support meets the requirements of this build of the library.
162-
*
163-
* Each library build is statically compiled for a particular set of CPU ISA features, such as the
164-
* SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
165-
* actually supports everything this build needs.
166-
*
167-
* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
168-
*/
169-
static astcenc_error validate_cpu_isa()
170-
{
171-
#if ASTCENC_SSE >= 41
172-
if (!cpu_supports_sse41())
173-
{
174-
return ASTCENC_ERR_BAD_CPU_ISA;
175-
}
176-
#endif
177-
178-
#if ASTCENC_POPCNT >= 1
179-
if (!cpu_supports_popcnt())
180-
{
181-
return ASTCENC_ERR_BAD_CPU_ISA;
182-
}
183-
#endif
184-
185-
#if ASTCENC_F16C >= 1
186-
if (!cpu_supports_f16c())
187-
{
188-
return ASTCENC_ERR_BAD_CPU_ISA;
189-
}
190-
#endif
191-
192-
#if ASTCENC_AVX >= 2
193-
if (!cpu_supports_avx2())
194-
{
195-
return ASTCENC_ERR_BAD_CPU_ISA;
196-
}
197-
#endif
198-
199-
return ASTCENC_SUCCESS;
200-
}
201-
202160
/**
203161
* @brief Validate config profile.
204162
*
@@ -475,14 +433,6 @@ astcenc_error astcenc_config_init(
475433
) {
476434
astcenc_error status;
477435

478-
// Check basic library compatibility options here so they are checked early. Note, these checks
479-
// are repeated in context_alloc for cases where callers use a manually defined config struct
480-
status = validate_cpu_isa();
481-
if (status != ASTCENC_SUCCESS)
482-
{
483-
return status;
484-
}
485-
486436
status = validate_cpu_float();
487437
if (status != ASTCENC_SUCCESS)
488438
{
@@ -702,12 +652,6 @@ astcenc_error astcenc_context_alloc(
702652
astcenc_error status;
703653
const astcenc_config& config = *configp;
704654

705-
status = validate_cpu_isa();
706-
if (status != ASTCENC_SUCCESS)
707-
{
708-
return status;
709-
}
710-
711655
status = validate_cpu_float();
712656
if (status != ASTCENC_SUCCESS)
713657
{
@@ -1399,8 +1343,6 @@ const char* astcenc_get_error_string(
13991343
return "ASTCENC_ERR_OUT_OF_MEM";
14001344
case ASTCENC_ERR_BAD_CPU_FLOAT:
14011345
return "ASTCENC_ERR_BAD_CPU_FLOAT";
1402-
case ASTCENC_ERR_BAD_CPU_ISA:
1403-
return "ASTCENC_ERR_BAD_CPU_ISA";
14041346
case ASTCENC_ERR_BAD_PARAM:
14051347
return "ASTCENC_ERR_BAD_PARAM";
14061348
case ASTCENC_ERR_BAD_BLOCK_SIZE:
Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2020-2022 Arm Limited
3+
// Copyright 2020-2023 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -18,12 +18,11 @@
1818
/**
1919
* @brief Platform-specific function implementations.
2020
*
21-
* This module contains functions for querying the host extended ISA support.
21+
* This module contains the CLI entry point which also performs the role of
22+
* validating the host extended ISA support meets the needs of the tools.
2223
*/
2324

24-
// Include before the defines below to pick up any auto-setup based on compiler
25-
// built-in config, if not being set explicitly by the build system
26-
#include "astcenc_internal.h"
25+
#include "astcenccli_internal.h"
2726

2827
#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || \
2928
(ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
@@ -163,4 +162,62 @@ bool cpu_supports_avx2()
163162
return g_cpu_has_avx2;
164163
}
165164

165+
/**
166+
* @brief Validate CPU ISA support meets the requirements of this build of the library.
167+
*
168+
* Each library build is statically compiled for a particular set of CPU ISA features, such as the
169+
* SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
170+
* actually supports everything this build needs.
171+
*
172+
* @return Return @c true if validated, @c false otherwise.
173+
*/
174+
static bool validate_cpu_isa()
175+
{
176+
#if ASTCENC_AVX >= 2
177+
if (!cpu_supports_avx2())
178+
{
179+
print_error("ERROR: Host does not support AVX2 ISA extension\n");
180+
return false;
181+
}
182+
#endif
183+
184+
#if ASTCENC_F16C >= 1
185+
if (!cpu_supports_f16c())
186+
{
187+
print_error("ERROR: Host does not support F16C ISA extension\n");
188+
return false;
189+
}
190+
#endif
191+
192+
#if ASTCENC_SSE >= 41
193+
if (!cpu_supports_sse41())
194+
{
195+
print_error("ERROR: Host does not support SSE4.1 ISA extension\n");
196+
return false;
197+
}
198+
#endif
199+
200+
#if ASTCENC_POPCNT >= 1
201+
if (!cpu_supports_popcnt())
202+
{
203+
print_error("ERROR: Host does not support POPCNT ISA extension\n");
204+
return false;
205+
}
206+
#endif
207+
208+
return true;
209+
}
210+
166211
#endif
212+
213+
int main(
214+
int argc,
215+
char **argv
216+
) {
217+
if (!validate_cpu_isa())
218+
{
219+
return 1;
220+
}
221+
222+
return astcenc_main(argc, argv);
223+
}

Source/astcenccli_internal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,4 +393,16 @@ void launch_threads(
393393
void (*func)(int, int, void*),
394394
void *payload);
395395

396+
/**
397+
* @brief The main entry point.
398+
*
399+
* @param argc The number of arguments.
400+
* @param argv The vector of arguments.
401+
*
402+
* @return 0 on success, non-zero otherwise.
403+
*/
404+
int astcenc_main(
405+
int argc,
406+
char **argv);
407+
396408
#endif

Source/astcenccli_toplevel.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -610,11 +610,6 @@ static int init_astcenc_config(
610610
print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
611611
return 1;
612612
}
613-
else if (status == ASTCENC_ERR_BAD_CPU_ISA)
614-
{
615-
print_error("ERROR: Required SIMD ISA support missing on this CPU\n");
616-
return 1;
617-
}
618613
else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
619614
{
620615
print_error("ERROR: astcenc must not be compiled with -ffast-math\n");
@@ -1839,7 +1834,7 @@ static void print_diagnostic_images(
18391834
*
18401835
* @return 0 on success, non-zero otherwise.
18411836
*/
1842-
int main(
1837+
int astcenc_main(
18431838
int argc,
18441839
char **argv
18451840
) {

Source/cmake_core.cmake

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# ----------------------------------------------------------------------------
3-
# Copyright 2020-2022 Arm Limited
3+
# Copyright 2020-2023 Arm Limited
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
# use this file except in compliance with the License. You may obtain a copy
@@ -46,7 +46,6 @@ add_library(${ASTC_TARGET}-static
4646
astcenc_partition_tables.cpp
4747
astcenc_percentile_tables.cpp
4848
astcenc_pick_best_endpoint_format.cpp
49-
astcenc_platform_isa_detection.cpp
5049
astcenc_quantization.cpp
5150
astcenc_symbolic_physical.cpp
5251
astcenc_weight_align.cpp
@@ -58,6 +57,11 @@ target_include_directories(${ASTC_TARGET}-static
5857
$<INSTALL_INTERFACE:.>)
5958

6059
if(${CLI})
60+
# Veneer is compiled without any extended ISA so we can safely do
61+
# ISA compatability checks without triggering a SIGILL
62+
add_library(${ASTC_TARGET}-veneer
63+
astcenccli_entry.cpp)
64+
6165
add_executable(${ASTC_TARGET}
6266
astcenccli_error_metrics.cpp
6367
astcenccli_image.cpp
@@ -69,10 +73,11 @@ if(${CLI})
6973

7074
target_link_libraries(${ASTC_TARGET}
7175
PRIVATE
76+
${ASTC_TARGET}-veneer
7277
${ASTC_TARGET}-static)
7378
endif()
7479

75-
macro(astcenc_set_properties NAME)
80+
macro(astcenc_set_properties NAME IS_VENEER)
7681

7782
target_compile_features(${NAME}
7883
PRIVATE
@@ -223,8 +228,7 @@ macro(astcenc_set_properties NAME)
223228
ASTCENC_F16C=0)
224229
endif()
225230

226-
# These settings are needed on AppleClang as SSE4.1 is on by default
227-
# Suppress unused argument for macOS universal build behavior
231+
# Force SSE2 on AppleClang (normally SSE4.1 is the default)
228232
target_compile_options(${NAME}
229233
PRIVATE
230234
$<$<CXX_COMPILER_ID:AppleClang>:-msse2>
@@ -242,11 +246,19 @@ macro(astcenc_set_properties NAME)
242246
ASTCENC_F16C=0)
243247
endif()
244248

245-
# Suppress unused argument for macOS universal build behavior
246-
target_compile_options(${NAME}
247-
PRIVATE
248-
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-msse4.1 -mpopcnt>
249-
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
249+
if (${IS_VENEER})
250+
# Force SSE2 on AppleClang (normally SSE4.1 is the default)
251+
target_compile_options(${NAME}
252+
PRIVATE
253+
$<$<CXX_COMPILER_ID:AppleClang>:-msse2>
254+
$<$<CXX_COMPILER_ID:AppleClang>:-mno-sse4.1>
255+
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
256+
else()
257+
target_compile_options(${NAME}
258+
PRIVATE
259+
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-msse4.1 -mpopcnt>
260+
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
261+
endif()
250262

251263
elseif((${ISA_SIMD} MATCHES "avx2") OR (${UNIVERSAL_BUILD} AND ${ISA_AVX2}))
252264
if(NOT ${UNIVERSAL_BUILD})
@@ -259,20 +271,28 @@ macro(astcenc_set_properties NAME)
259271
ASTCENC_F16C=1)
260272
endif()
261273

262-
# Suppress unused argument for macOS universal build behavior
263-
target_compile_options(${NAME}
264-
PRIVATE
265-
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-mavx2 -mpopcnt -mf16c>
266-
$<$<CXX_COMPILER_ID:MSVC>:/arch:AVX2>
267-
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
274+
if (${IS_VENEER})
275+
# Force SSE2 on AppleClang (normally SSE4.1 is the default)
276+
target_compile_options(${NAME}
277+
PRIVATE
278+
$<$<CXX_COMPILER_ID:AppleClang>:-msse2>
279+
$<$<CXX_COMPILER_ID:AppleClang>:-mno-sse4.1>
280+
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
281+
else()
282+
target_compile_options(${NAME}
283+
PRIVATE
284+
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-mavx2 -mpopcnt -mf16c>
285+
$<$<CXX_COMPILER_ID:MSVC>:/arch:AVX2>
286+
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-command-line-argument>)
287+
endif()
268288

269289
# Non-invariant builds enable us to loosen the compiler constraints on
270290
# floating point, but this is only worth doing on CPUs with AVX2 because
271291
# this implies we can also enable the FMA instruction set extensions
272292
# which significantly improve performance. Note that this DOES reduce
273293
# image quality by up to 0.2 dB (normally much less), but buys an
274294
# average of 10-15% performance improvement ...
275-
if(${NO_INVARIANCE})
295+
if(${NO_INVARIANCE} AND NOT ${IS_VENEER})
276296
target_compile_options(${NAME}
277297
PRIVATE
278298
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-mfma>)
@@ -309,19 +329,24 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
309329
COMPILE_FLAGS ${EXTERNAL_CXX_FLAGS})
310330
endif()
311331

312-
astcenc_set_properties(${ASTC_TARGET}-static)
332+
astcenc_set_properties(${ASTC_TARGET}-static OFF)
313333

314334
target_compile_options(${ASTC_TARGET}-static
315335
PRIVATE
316336
$<$<CXX_COMPILER_ID:MSVC>:/W4>)
317337

318338
if(${CLI})
319-
astcenc_set_properties(${ASTC_TARGET})
339+
astcenc_set_properties(${ASTC_TARGET}-veneer ON)
340+
astcenc_set_properties(${ASTC_TARGET} OFF)
320341

321342
target_compile_options(${ASTC_TARGET}
322343
PRIVATE
323344
$<$<CXX_COMPILER_ID:MSVC>:/W3>)
324345

346+
target_compile_options(${ASTC_TARGET}-veneer
347+
PRIVATE
348+
$<$<CXX_COMPILER_ID:MSVC>:/W3>)
349+
325350
string(TIMESTAMP astcencoder_YEAR "%Y")
326351

327352
configure_file(

0 commit comments

Comments
 (0)