Skip to content

Commit 33120f5

Browse files
committed
Store weight_bits in block_mode structure
1 parent 2512930 commit 33120f5

4 files changed

Lines changed: 49 additions & 55 deletions

File tree

Source/astcenc_block_sizes.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
* @param[out] y_weights The number of weights in the Y dimension.
3030
* @param[out] is_dual_plane True if this block mode has two weight planes.
3131
* @param[out] quant_mode The quantization level for the weights.
32+
* @param[out] weight_bits The storage bit count for the weights.
3233
*
3334
* @return Returns true if a valid mode, false otherwise.
3435
*/
@@ -37,7 +38,8 @@ static bool decode_block_mode_2d(
3738
unsigned int& x_weights,
3839
unsigned int& y_weights,
3940
bool& is_dual_plane,
40-
unsigned int& quant_mode
41+
unsigned int& quant_mode,
42+
unsigned int& weight_bits
4143
) {
4244
unsigned int base_quant_mode = (block_mode >> 4) & 1;
4345
unsigned int H = (block_mode >> 9) & 1;
@@ -128,7 +130,7 @@ static bool decode_block_mode_2d(
128130
quant_mode = (base_quant_mode - 2) + 6 * H;
129131
is_dual_plane = D != 0;
130132

131-
unsigned int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
133+
weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
132134
return (weight_count <= BLOCK_MAX_WEIGHTS &&
133135
weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
134136
weight_bits <= BLOCK_MAX_WEIGHT_BITS);
@@ -143,6 +145,7 @@ static bool decode_block_mode_2d(
143145
* @param[out] z_weights The number of weights in the Z dimension.
144146
* @param[out] is_dual_plane True if this block mode has two weight planes.
145147
* @param[out] quant_mode The quantization level for the weights.
148+
* @param[out] weight_bits The storage bit count for the weights.
146149
*
147150
* @return Returns true if a valid mode, false otherwise.
148151
*/
@@ -152,7 +155,8 @@ static bool decode_block_mode_3d(
152155
unsigned int& y_weights,
153156
unsigned int& z_weights,
154157
bool& is_dual_plane,
155-
unsigned int& quant_mode
158+
unsigned int& quant_mode,
159+
unsigned int& weight_bits
156160
) {
157161
unsigned int base_quant_mode = (block_mode >> 4) & 1;
158162
unsigned int H = (block_mode >> 9) & 1;
@@ -229,7 +233,7 @@ static bool decode_block_mode_3d(
229233
quant_mode = (base_quant_mode - 2) + 6 * H;
230234
is_dual_plane = D != 0;
231235

232-
unsigned int weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
236+
weight_bits = get_ise_sequence_bitcount(weight_count, (quant_method)quant_mode);
233237
return (weight_count <= BLOCK_MAX_WEIGHTS &&
234238
weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
235239
weight_bits <= BLOCK_MAX_WEIGHT_BITS);
@@ -857,10 +861,11 @@ static void construct_block_size_descriptor_2d(
857861
{
858862
for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
859863
{
860-
unsigned int x_weights, y_weights;
864+
unsigned int x_weights;
865+
unsigned int y_weights;
861866
bool is_dual_plane;
862-
863867
unsigned int quant_mode;
868+
unsigned int weight_bits;
864869

865870
#if !defined(ASTCENC_DECOMPRESS_ONLY)
866871
float percentile = percentiles[i];
@@ -893,7 +898,7 @@ static void construct_block_size_descriptor_2d(
893898
// is technically permitted by the specification.
894899

895900
// Skip modes that are invalid, too large, or not selected by heuristic
896-
bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode);
901+
bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
897902
if (!selected || !valid || (x_weights > x_texels) || (y_weights > y_texels))
898903
{
899904
bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
@@ -937,6 +942,7 @@ static void construct_block_size_descriptor_2d(
937942
bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
938943
bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
939944
bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
945+
bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
940946
bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
941947
bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
942948
packed_idx++;
@@ -1067,12 +1073,15 @@ static void construct_block_size_descriptor_3d(
10671073
unsigned int packed_idx = 0;
10681074
for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
10691075
{
1070-
unsigned int x_weights, y_weights, z_weights;
1076+
unsigned int x_weights;
1077+
unsigned int y_weights;
1078+
unsigned int z_weights;
10711079
bool is_dual_plane;
10721080
unsigned int quant_mode;
1081+
unsigned int weight_bits;
10731082
bool permit_encode = true;
10741083

1075-
if (decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode))
1084+
if (decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits))
10761085
{
10771086
if (x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
10781087
{
@@ -1093,6 +1102,7 @@ static void construct_block_size_descriptor_3d(
10931102
int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
10941103
bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
10951104
bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
1105+
bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
10961106
bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
10971107
bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
10981108

Source/astcenc_compress_symbolic.cpp

Lines changed: 16 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -388,10 +388,6 @@ static float compress_symbolic_block_for_partition_1plane(
388388
promise(config.tune_refinement_limit > 0);
389389
promise(bsd.decimation_mode_count > 0);
390390

391-
static const int free_bits_for_partition_count[5] {
392-
0, 115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS
393-
};
394-
395391
const auto& pi = bsd.get_partition_info(partition_count, partition_index);
396392

397393
// Compute ideal weights and endpoint colors, with no quantization or decimation
@@ -455,19 +451,21 @@ static float compress_symbolic_block_for_partition_1plane(
455451
// * Generate an optimized set of quantized weights
456452
// * Compute quantization errors for the mode
457453

458-
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
459-
{
460-
qwt_errors[i] = 1e38f;
461-
}
454+
455+
static const int8_t free_bits_for_partition_count[4] {
456+
115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS
457+
};
462458

463459
unsigned int max_block_modes = only_always ? bsd.always_block_mode_count
464460
: bsd.block_mode_count;
465461
promise(max_block_modes > 0);
466462
for (unsigned int i = 0; i < max_block_modes; ++i)
467463
{
468464
const block_mode& bm = bsd.block_modes[i];
469-
if (bm.is_dual_plane || !bm.percentile_hit)
465+
int bitcount = free_bits_for_partition_count[partition_count - 1] - bm.weight_bits;
466+
if (bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
470467
{
468+
qwt_errors[i] = 1e38f;
471469
continue;
472470
}
473471

@@ -479,17 +477,6 @@ static float compress_symbolic_block_for_partition_1plane(
479477
int decimation_mode = bm.decimation_mode;
480478
const auto& di = bsd.get_decimation_info(decimation_mode);
481479

482-
// Compute weight bitcount for the mode
483-
unsigned int bits_used_by_weights = get_ise_sequence_bitcount(
484-
di.weight_count,
485-
bm.get_weight_quant_mode());
486-
487-
int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
488-
if (bitcount <= 0)
489-
{
490-
continue;
491-
}
492-
493480
qwt_bitcounts[i] = bitcount;
494481

495482
// Generate the optimized set of weights for the weight mode
@@ -517,7 +504,8 @@ static float compress_symbolic_block_for_partition_1plane(
517504

518505
unsigned int candidate_count = compute_ideal_endpoint_formats(
519506
bsd, pi, blk, ei.ep, qwt_bitcounts, qwt_errors,
520-
config.tune_candidate_limit, partition_format_specifiers, block_mode_index,
507+
config.tune_candidate_limit, max_block_modes,
508+
partition_format_specifiers, block_mode_index,
521509
color_quant_level, color_quant_level_mod, tmpbuf);
522510

523511
// Iterate over the N believed-to-be-best modes to find out which one is actually best
@@ -832,14 +820,14 @@ static float compress_symbolic_block_for_partition_2planes(
832820
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
833821
{
834822
const block_mode& bm = bsd.block_modes[i];
835-
if (!bm.is_dual_plane || !bm.percentile_hit)
823+
int bitcount = 109 - bm.weight_bits;
824+
if (!bm.is_dual_plane || !bm.percentile_hit || bitcount <= 0)
836825
{
837826
qwt_errors[i] = 1e38f;
838827
continue;
839828
}
840829

841-
unsigned int decimation_mode = bm.decimation_mode;
842-
const auto& di = bsd.get_decimation_info(decimation_mode);
830+
qwt_bitcounts[i] = bitcount;
843831

844832
if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
845833
{
@@ -851,18 +839,8 @@ static float compress_symbolic_block_for_partition_2planes(
851839
weight_high_value2[i] = 1.0f;
852840
}
853841

854-
// Compute weight bitcount for the mode
855-
unsigned int bits_used_by_weights = get_ise_sequence_bitcount(
856-
2 * di.weight_count,
857-
bm.get_weight_quant_mode());
858-
859-
int bitcount = 113 - 4 - bits_used_by_weights;
860-
if (bitcount <= 0)
861-
{
862-
continue;
863-
}
864-
865-
qwt_bitcounts[i] = bitcount;
842+
unsigned int decimation_mode = bm.decimation_mode;
843+
const auto& di = bsd.get_decimation_info(decimation_mode);
866844

867845
// Generate the optimized set of weights for the mode
868846
compute_quantized_weights_for_decimation(
@@ -905,7 +883,8 @@ static float compress_symbolic_block_for_partition_2planes(
905883
const auto& pi = bsd.get_partition_info(1, 0);
906884
unsigned int candidate_count = compute_ideal_endpoint_formats(
907885
bsd, pi, blk, epm, qwt_bitcounts, qwt_errors,
908-
config.tune_candidate_limit, partition_format_specifiers, block_mode_index,
886+
config.tune_candidate_limit, bsd.block_mode_count,
887+
partition_format_specifiers, block_mode_index,
909888
color_quant_level, color_quant_level_mod, tmpbuf);
910889

911890
// Iterate over the N believed-to-be-best modes to find out which one is actually best

Source/astcenc_internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,9 @@ struct block_mode
600600
/** @brief The weight quantization used by this block mode. */
601601
uint8_t quant_mode;
602602

603+
/** @brief The weight quantization used by this block mode. */
604+
uint8_t weight_bits;
605+
603606
/** @brief Is a dual weight plane used by this block mode? */
604607
uint8_t is_dual_plane : 1;
605608

@@ -1979,6 +1982,7 @@ void unpack_weights(
19791982
* @param qwt_bitcounts Bit counts for different quantization methods.
19801983
* @param qwt_errors Errors for different quantization methods.
19811984
* @param tune_candidate_limit The max number of candidates to return, may be less.
1985+
* @param block_mode_count The number of blocks mofdes candidates to inspect.
19821986
* @param[out] partition_format_specifiers The best formats per partition.
19831987
* @param[out] block_mode The best packed block mode indexes.
19841988
* @param[out] quant_level The best color quant level.
@@ -1995,6 +1999,7 @@ unsigned int compute_ideal_endpoint_formats(
19951999
const int* qwt_bitcounts,
19962000
const float* qwt_errors,
19972001
unsigned int tune_candidate_limit,
2002+
unsigned int block_mode_count,
19982003
int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
19992004
int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
20002005
quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],

Source/astcenc_pick_best_endpoint_format.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,7 @@ unsigned int compute_ideal_endpoint_formats(
10481048
const int* qwt_bitcounts,
10491049
const float* qwt_errors,
10501050
unsigned int tune_candidate_limit,
1051+
unsigned int block_mode_count,
10511052
// output data
10521053
int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
10531054
int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
@@ -1058,7 +1059,7 @@ unsigned int compute_ideal_endpoint_formats(
10581059
int partition_count = pi.partition_count;
10591060

10601061
promise(partition_count > 0);
1061-
promise(bsd.block_mode_count > 0);
1062+
promise(block_mode_count > 0);
10621063

10631064
int encode_hdr_rgb = blk.rgb_lns[0];
10641065
int encode_hdr_alpha = blk.alpha_lns[0];
@@ -1085,9 +1086,8 @@ unsigned int compute_ideal_endpoint_formats(
10851086

10861087
// Ensure that the "overstep" of the last iteration in the vectorized loop will contain data
10871088
// that will never be picked as best candidate
1088-
const int packed_mode_count = bsd.block_mode_count;
1089-
const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(packed_mode_count);
1090-
for (int i = packed_mode_count; i < packed_mode_count_simd_up; i++)
1089+
const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(block_mode_count);
1090+
for (int i = block_mode_count; i < packed_mode_count_simd_up; i++)
10911091
{
10921092
errors_of_best_combination[i] = ERROR_CALC_DEFAULT;
10931093
best_quant_levels[i] = QUANT_2;
@@ -1101,7 +1101,7 @@ unsigned int compute_ideal_endpoint_formats(
11011101
// The block contains 1 partition
11021102
if (partition_count == 1)
11031103
{
1104-
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
1104+
for (unsigned int i = 0; i < block_mode_count; ++i)
11051105
{
11061106
if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
11071107
{
@@ -1133,7 +1133,7 @@ unsigned int compute_ideal_endpoint_formats(
11331133
two_partitions_find_best_combination_for_every_quantization_and_integer_count(
11341134
best_error, format_of_choice, combined_best_error, formats_of_choice);
11351135

1136-
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
1136+
for (unsigned int i = 0; i < block_mode_count; ++i)
11371137
{
11381138
if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
11391139
{
@@ -1165,7 +1165,7 @@ unsigned int compute_ideal_endpoint_formats(
11651165
three_partitions_find_best_combination_for_every_quantization_and_integer_count(
11661166
best_error, format_of_choice, combined_best_error, formats_of_choice);
11671167

1168-
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
1168+
for (unsigned int i = 0; i < block_mode_count; ++i)
11691169
{
11701170
if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
11711171
{
@@ -1198,7 +1198,7 @@ unsigned int compute_ideal_endpoint_formats(
11981198
four_partitions_find_best_combination_for_every_quantization_and_integer_count(
11991199
best_error, format_of_choice, combined_best_error, formats_of_choice);
12001200

1201-
for (unsigned int i = 0; i < bsd.block_mode_count; ++i)
1201+
for (unsigned int i = 0; i < block_mode_count; ++i)
12021202
{
12031203
if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
12041204
{
@@ -1237,7 +1237,7 @@ unsigned int compute_ideal_endpoint_formats(
12371237
vint vbest_error_index(-1);
12381238
vfloat vbest_ep_error(ERROR_CALC_DEFAULT);
12391239
vint lane_ids = vint::lane_id();
1240-
for (unsigned int j = 0; j < bsd.block_mode_count; j += ASTCENC_SIMD_WIDTH)
1240+
for (unsigned int j = 0; j < block_mode_count; j += ASTCENC_SIMD_WIDTH)
12411241
{
12421242
vfloat err = vfloat(&errors_of_best_combination[j]);
12431243
vmask mask1 = err < vbest_ep_error;

0 commit comments

Comments
 (0)