Skip to content

Commit 4363496

Browse files
authored
Weight quant bound forwarding (#333)
Currently all trials are independent, repeating the full active block mode and decimation mode search for every plane count and partition count that is tried. In reality the first full search of 1 plane 1 partition sets a usable baseline for what plausible encodings will look like - adding more planes and more partitions makes is very unlikely (although not impossible) that later trials will actually use a higher weight quantization level as they just don't have the bitrate available to do it effectively. This PR adds quant forwarding to the trials. The "1 plane 1 partition" search will do a full search of all active modes, and then the weight quant of the most successful block mode in that search will be used as an upper bound on the later searches that target more planes and partitions. This dramatically prunes the search space, significantly helping -medium and -thorough.
1 parent 47991de commit 4363496

9 files changed

Lines changed: 559 additions & 468 deletions

Source/astcenc_block_sizes.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,8 @@ static void construct_dt_entry_2d(
802802
assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
803803
bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
804804
bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
805-
bsd.decimation_modes[index].ref_1_plane = 0;
806-
bsd.decimation_modes[index].ref_2_planes = 0;
805+
bsd.decimation_modes[index].refprec_1_plane = 0;
806+
bsd.decimation_modes[index].refprec_2_planes = 0;
807807
}
808808

809809
/**
@@ -953,11 +953,11 @@ static void construct_block_size_descriptor_2d(
953953

954954
if (is_dual_plane)
955955
{
956-
dm.ref_2_planes = 1;
956+
dm.set_ref_2_plane(bm.get_weight_quant_mode());
957957
}
958958
else
959959
{
960-
dm.ref_1_plane = 1;
960+
dm.set_ref_1_plane(bm.get_weight_quant_mode());
961961
}
962962

963963
bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
@@ -994,8 +994,8 @@ static void construct_block_size_descriptor_2d(
994994
{
995995
bsd.decimation_modes[i].maxprec_1plane = -1;
996996
bsd.decimation_modes[i].maxprec_2planes = -1;
997-
bsd.decimation_modes[i].ref_1_plane = 0;
998-
bsd.decimation_modes[i].ref_2_planes = 0;
997+
bsd.decimation_modes[i].refprec_1_plane = 0;
998+
bsd.decimation_modes[i].refprec_2_planes = 0;
999999
}
10001000

10011001
// Determine the texels to use for kmeans clustering.
@@ -1080,8 +1080,8 @@ static void construct_block_size_descriptor_3d(
10801080

10811081
bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
10821082
bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
1083-
bsd.decimation_modes[decimation_mode_count].ref_1_plane = maxprec_1plane == -1 ? 0 : 1;
1084-
bsd.decimation_modes[decimation_mode_count].ref_2_planes = maxprec_2planes == -1 ? 0 : 1;
1083+
bsd.decimation_modes[decimation_mode_count].refprec_1_plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
1084+
bsd.decimation_modes[decimation_mode_count].refprec_2_planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
10851085
decimation_mode_count++;
10861086
}
10871087
}
@@ -1092,8 +1092,8 @@ static void construct_block_size_descriptor_3d(
10921092
{
10931093
bsd.decimation_modes[i].maxprec_1plane = -1;
10941094
bsd.decimation_modes[i].maxprec_2planes = -1;
1095-
bsd.decimation_modes[i].ref_1_plane = 0;
1096-
bsd.decimation_modes[i].ref_2_planes = 0;
1095+
bsd.decimation_modes[i].refprec_1_plane = 0;
1096+
bsd.decimation_modes[i].refprec_2_planes = 0;
10971097
}
10981098

10991099
bsd.decimation_mode_count_always = 0; // Skipped for 3D modes

Source/astcenc_compress_symbolic.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -371,12 +371,15 @@ static float compress_symbolic_block_for_partition_1plane(
371371
unsigned int partition_count,
372372
unsigned int partition_index,
373373
symbolic_compressed_block& scb,
374-
compression_working_buffers& tmpbuf
374+
compression_working_buffers& tmpbuf,
375+
int quant_limit
375376
) {
376377
promise(partition_count > 0);
377378
promise(config.tune_candidate_limit > 0);
378379
promise(config.tune_refinement_limit > 0);
379380

381+
int max_weight_quant = astc::min(static_cast<int>(QUANT_32), quant_limit);
382+
380383
auto compute_difference = &compute_symbolic_block_difference_1plane;
381384
if ((partition_count == 1) && !(config.flags & ASTCENC_FLG_MAP_RGBM))
382385
{
@@ -400,7 +403,7 @@ static float compress_symbolic_block_for_partition_1plane(
400403
for (unsigned int i = 0; i < max_decimation_modes; i++)
401404
{
402405
const auto& dm = bsd.get_decimation_mode(i);
403-
if (!dm.ref_1_plane)
406+
if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
404407
{
405408
continue;
406409
}
@@ -431,6 +434,7 @@ static float compress_symbolic_block_for_partition_1plane(
431434
config.tune_low_weight_count_limit,
432435
only_always, bsd,
433436
dec_weights_ideal,
437+
max_weight_quant,
434438
tmpbuf);
435439

436440
float* weight_low_value = tmpbuf.weight_low_value1;
@@ -454,6 +458,13 @@ static float compress_symbolic_block_for_partition_1plane(
454458
for (unsigned int i = 0; i < max_block_modes; ++i)
455459
{
456460
const block_mode& bm = bsd.block_modes[i];
461+
462+
if (bm.quant_mode > max_weight_quant)
463+
{
464+
qwt_errors[i] = 1e38f;
465+
continue;
466+
}
467+
457468
assert(!bm.is_dual_plane);
458469
int bitcount = free_bits_for_partition_count[partition_count - 1] - bm.weight_bits;
459470
if (bitcount <= 0)
@@ -721,12 +732,15 @@ static float compress_symbolic_block_for_partition_2planes(
721732
float tune_errorval_threshold,
722733
unsigned int plane2_component,
723734
symbolic_compressed_block& scb,
724-
compression_working_buffers& tmpbuf
735+
compression_working_buffers& tmpbuf,
736+
int quant_limit
725737
) {
726738
promise(config.tune_candidate_limit > 0);
727739
promise(config.tune_refinement_limit > 0);
728740
promise(bsd.decimation_mode_count_selected > 0);
729741

742+
int max_weight_quant = astc::min(static_cast<int>(QUANT_32), quant_limit);
743+
730744
// Compute ideal weights and endpoint colors, with no quantization or decimation
731745
endpoints_and_weights& ei1 = tmpbuf.ei1;
732746
endpoints_and_weights& ei2 = tmpbuf.ei2;
@@ -741,7 +755,7 @@ static float compress_symbolic_block_for_partition_2planes(
741755
for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
742756
{
743757
const auto& dm = bsd.get_decimation_mode(i);
744-
if (!dm.ref_2_planes)
758+
if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
745759
{
746760
continue;
747761
}
@@ -785,7 +799,7 @@ static float compress_symbolic_block_for_partition_2planes(
785799

786800
compute_angular_endpoints_2planes(
787801
config.tune_low_weight_count_limit,
788-
bsd, dec_weights_ideal,
802+
bsd, dec_weights_ideal, max_weight_quant,
789803
tmpbuf);
790804

791805
// For each mode (which specifies a decimation and a quantization):
@@ -809,6 +823,12 @@ static float compress_symbolic_block_for_partition_2planes(
809823
const block_mode& bm = bsd.block_modes[i];
810824
assert(bm.is_dual_plane);
811825

826+
if (bm.quant_mode > max_weight_quant)
827+
{
828+
qwt_errors[i] = 1e38f;
829+
continue;
830+
}
831+
812832
qwt_bitcounts[i] = 109 - bm.weight_bits;
813833

814834
if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
@@ -1270,6 +1290,7 @@ void compress_block(
12701290
start_trial = 0;
12711291
}
12721292

1293+
int quant_limit = QUANT_32;
12731294
for (int i = start_trial; i < 2; i++)
12741295
{
12751296
TRACE_NODE(node1, "pass");
@@ -1280,7 +1301,11 @@ void compress_block(
12801301
float errorval = compress_symbolic_block_for_partition_1plane(
12811302
ctx.config, bsd, blk, i == 0,
12821303
error_threshold * errorval_mult[i] * errorval_overshoot,
1283-
1, 0, scb, tmpbuf);
1304+
1, 0, scb, tmpbuf, QUANT_32);
1305+
1306+
// Record the quant level so we can use the filter later searches
1307+
const auto& bm = bsd.get_block_mode(scb.block_mode);
1308+
quant_limit = bm.get_weight_quant_mode();
12841309

12851310
best_errorvals_for_pcount[0] = astc::min(best_errorvals_for_pcount[0], errorval);
12861311
if (errorval < (error_threshold * errorval_mult[i]))
@@ -1325,7 +1350,7 @@ void compress_block(
13251350

13261351
float errorval = compress_symbolic_block_for_partition_2planes(
13271352
ctx.config, bsd, blk, error_threshold * errorval_overshoot,
1328-
i, scb, tmpbuf);
1353+
i, scb, tmpbuf, quant_limit);
13291354

13301355
// If attempting two planes is much worse than the best one plane result
13311356
// then further two plane searches are unlikely to help so move on ...
@@ -1362,7 +1387,7 @@ void compress_block(
13621387
ctx.config, bsd, blk, false,
13631388
error_threshold * errorval_overshoot,
13641389
partition_count, partition_indices[i],
1365-
scb, tmpbuf);
1390+
scb, tmpbuf, quant_limit);
13661391

13671392
best_errorvals_for_pcount[partition_count - 1] = astc::min(best_errorvals_for_pcount[partition_count - 1], errorval);
13681393
if (errorval < error_threshold)

Source/astcenc_entry.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ struct astcenc_preset_config
6262
static const std::array<astcenc_preset_config, 5> preset_configs_high {{
6363
{
6464
ASTCENC_PRE_FASTEST,
65-
2, 10, 42, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
65+
2, 14, 44, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 25
6666
}, {
6767
ASTCENC_PRE_FAST,
6868
3, 14, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.65f, 20
6969
}, {
7070
ASTCENC_PRE_MEDIUM,
71-
4, 28, 76, 3, 3 , 95.0f, 70.0f, 2.5f, 2.5f, 1.2f, 1.25f, 0.85f, 16
71+
4, 28, 76, 3, 3, 95.0f, 70.0f, 2.5f, 2.5f, 1.2f, 1.25f, 0.85f, 16
7272
}, {
7373
ASTCENC_PRE_THOROUGH,
7474
4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 12
@@ -85,13 +85,13 @@ static const std::array<astcenc_preset_config, 5> preset_configs_high {{
8585
static const std::array<astcenc_preset_config, 5> preset_configs_mid {{
8686
{
8787
ASTCENC_PRE_FASTEST,
88-
2, 10, 40, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
88+
2, 14, 43, 2, 2, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
8989
}, {
9090
ASTCENC_PRE_FAST,
91-
3, 14, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
91+
3, 15, 55, 3, 3, 85.2f, 63.2f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
9292
}, {
9393
ASTCENC_PRE_MEDIUM,
94-
4, 28, 76, 3, 3, 95.0f, 70.0f, 3.0f, 3.0f, 1.2f, 1.25f, 0.75f, 14
94+
4, 30, 76, 3, 3, 95.0f, 70.0f, 3.0f, 3.0f, 1.2f, 1.25f, 0.75f, 14
9595
}, {
9696
ASTCENC_PRE_THOROUGH,
9797
4, 76, 93, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.95f, 10
@@ -109,13 +109,13 @@ static const std::array<astcenc_preset_config, 5> preset_configs_mid {{
109109
static const std::array<astcenc_preset_config, 5> preset_configs_low {{
110110
{
111111
ASTCENC_PRE_FASTEST,
112-
2, 6, 38, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
112+
2, 14, 42, 2, 2, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.0f, 0.5f, 20
113113
}, {
114114
ASTCENC_PRE_FAST,
115-
3, 10, 53, 3, 3, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
115+
2, 15, 55, 3, 3, 85.0f, 63.0f, 3.5f, 3.5f, 1.0f, 1.1f, 0.5f, 16
116116
}, {
117117
ASTCENC_PRE_MEDIUM,
118-
3, 26, 76, 3, 3, 95.0f, 70.0f, 3.5f, 3.5f, 1.2f, 1.25f, 0.65f, 12
118+
3, 30, 76, 3, 3, 95.0f, 70.0f, 3.5f, 3.5f, 1.2f, 1.25f, 0.65f, 12
119119
}, {
120120
ASTCENC_PRE_THOROUGH,
121121
4, 75, 92, 4, 4, 105.0f, 77.0f, 10.0f, 10.0f, 2.5f, 1.25f, 0.85f, 10

Source/astcenc_internal.h

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,11 +649,61 @@ struct decimation_mode
649649
/** @brief The max weight precision for 2 planes, or -1 if not supported. */
650650
int8_t maxprec_2planes;
651651

652-
/** @brief Was this actually referenced by an active 1 plane mode? */
653-
uint8_t ref_1_plane;
652+
/**
653+
* @brief Bitvector indicating weight quant modes used by active 1 plane block modes.
654+
*
655+
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
656+
*/
657+
uint16_t refprec_1_plane;
658+
659+
/**
660+
* @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
661+
*
662+
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
663+
*/
664+
uint16_t refprec_2_planes;
665+
666+
/**
667+
* @brief Set a 1 plane weight quant as active.
668+
*
669+
* @param weight_quant The quant method to set.
670+
*/
671+
void set_ref_1_plane(quant_method weight_quant)
672+
{
673+
refprec_1_plane |= (1 << weight_quant);
674+
}
675+
676+
/**
677+
* @brief Test if this mode is active below a given 1 plane weight quant (inclusive).
678+
*
679+
* @param max_weight_quant The max quant method to test.
680+
*/
681+
bool is_ref_1_plane(quant_method max_weight_quant) const
682+
{
683+
uint16_t mask = (1 << (max_weight_quant + 1)) - 1;
684+
return (refprec_1_plane & mask) != 0;
685+
}
654686

655-
/** @brief Was this actually referenced by an active 2 plane mode? */
656-
uint8_t ref_2_planes;
687+
/**
688+
* @brief Set a 2 plane weight quant as active.
689+
*
690+
* @param weight_quant The quant method to set.
691+
*/
692+
void set_ref_2_plane(quant_method weight_quant)
693+
{
694+
refprec_2_planes |= (1 << weight_quant);
695+
}
696+
697+
/**
698+
* @brief Test if this mode is active below a given 2 plane weight quant (inclusive).
699+
*
700+
* @param max_weight_quant The max quant method to test.
701+
*/
702+
bool is_ref_2_plane(quant_method max_weight_quant) const
703+
{
704+
uint16_t mask = (1 << (max_weight_quant + 1)) - 1;
705+
return (refprec_2_planes & mask) != 0;
706+
}
657707
};
658708

659709
/**
@@ -2246,13 +2296,15 @@ void prepare_angular_tables();
22462296
* @param only_always Only consider block modes that are always enabled.
22472297
* @param bsd The block size descriptor for the current trial.
22482298
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
2299+
* @param max_weight_quant The maximum block mode weight quantization allowed.
22492300
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
22502301
*/
22512302
void compute_angular_endpoints_1plane(
22522303
unsigned int tune_low_weight_limit,
22532304
bool only_always,
22542305
const block_size_descriptor& bsd,
22552306
const float* dec_weight_ideal_value,
2307+
unsigned int max_weight_quant,
22562308
compression_working_buffers& tmpbuf);
22572309

22582310
/**
@@ -2261,12 +2313,14 @@ void compute_angular_endpoints_1plane(
22612313
* @param tune_low_weight_limit Weight count cutoff below which we use simpler searches.
22622314
* @param bsd The block size descriptor for the current trial.
22632315
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
2316+
* @param max_weight_quant The maximum block mode weight quantization allowed.
22642317
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
22652318
*/
22662319
void compute_angular_endpoints_2planes(
22672320
unsigned int tune_low_weight_limit,
22682321
const block_size_descriptor& bsd,
22692322
const float* dec_weight_ideal_value,
2323+
unsigned int max_weight_quant,
22702324
compression_working_buffers& tmpbuf);
22712325

22722326
/* ============================================================================

Source/astcenc_weight_align.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ void compute_angular_endpoints_1plane(
488488
bool only_always,
489489
const block_size_descriptor& bsd,
490490
const float* dec_weight_ideal_value,
491+
unsigned int max_weight_quant,
491492
compression_working_buffers& tmpbuf
492493
) {
493494
float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -502,7 +503,7 @@ void compute_angular_endpoints_1plane(
502503
for (unsigned int i = 0; i < max_decimation_modes; i++)
503504
{
504505
const decimation_mode& dm = bsd.decimation_modes[i];
505-
if (!dm.ref_1_plane)
506+
if (!dm.is_ref_1_plane(static_cast<quant_method>(max_weight_quant)))
506507
{
507508
continue;
508509
}
@@ -515,6 +516,11 @@ void compute_angular_endpoints_1plane(
515516
max_precision = TUNE_MAX_ANGULAR_QUANT;
516517
}
517518

519+
if (max_precision > max_weight_quant)
520+
{
521+
max_precision = max_weight_quant;
522+
}
523+
518524
if (weight_count < tune_low_weight_limit)
519525
{
520526
compute_angular_endpoints_for_quant_levels_lwc(
@@ -560,6 +566,7 @@ void compute_angular_endpoints_2planes(
560566
unsigned int tune_low_weight_limit,
561567
const block_size_descriptor& bsd,
562568
const float* dec_weight_ideal_value,
569+
unsigned int max_weight_quant,
563570
compression_working_buffers& tmpbuf
564571
) {
565572
float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -576,7 +583,7 @@ void compute_angular_endpoints_2planes(
576583
for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
577584
{
578585
const decimation_mode& dm = bsd.decimation_modes[i];
579-
if (!dm.ref_2_planes)
586+
if (!dm.is_ref_2_plane(static_cast<quant_method>(max_weight_quant)))
580587
{
581588
continue;
582589
}
@@ -589,6 +596,11 @@ void compute_angular_endpoints_2planes(
589596
max_precision = TUNE_MAX_ANGULAR_QUANT;
590597
}
591598

599+
if (max_precision > max_weight_quant)
600+
{
601+
max_precision = max_weight_quant;
602+
}
603+
592604
if (weight_count < tune_low_weight_limit)
593605
{
594606
compute_angular_endpoints_for_quant_levels_lwc(

0 commit comments

Comments
 (0)