Skip to content

Commit 2042cfc

Browse files
committed
Stop testing search mode0 for mid bit-rate thorough trials
1 parent 54fff63 commit 2042cfc

5 files changed

Lines changed: 46 additions & 27 deletions

File tree

Docs/ChangeLog-4x.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,20 @@ clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads.
1111

1212
**Status:** In development
1313

14-
The 4.6.0 release is a small release increment (so far).
14+
The 4.6.0 release retunes the compressor heuristics to give improvements to
15+
performance for trivial losses to image quality.
16+
17+
Reminder - the codec library API is not designed to be binary compatible across
18+
versions. We always recommend rebuilding your client-side code using the updated
19+
`astcenc.h` header.
1520

1621
* **General:**
1722
* **Optimization:** `-medium` search quality no longer tests 4 partition
18-
encodings for block sizes between 25 and 83 texels. This improves
19-
performance for a tiny drop in image quality.
23+
encodings for block sizes between 25 and 83 texels (inclusive). This
24+
improves performance for a tiny drop in image quality.
25+
* **Optimization:** `-thorough` and higher search qualities no longer test the
26+
mode0 first search for block sizes between 25 and 83 texels (inclusive).
27+
This improves performance for a tiny drop in image quality.
2028
* **Optimization:** `TUNE_MAX_PARTITIONING_CANDIDATES` reduced from 32 to 8
2129
to reduce the size of stack allocated data structures. This causes a tiny
2230
drop in image quality for the `-verythorough` and `-exhaustive` presets.

Source/astcenc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,14 @@ struct astcenc_config
542542
*/
543543
float tune_2plane_early_out_limit_correlation;
544544

545+
/**
546+
* @brief The config enable for the mode0 fast-path search.
547+
*
548+
* If this is set to TUNE_MIN_TEXELS_MODE0 or higher then the early-out fast mode0
549+
* search is enabled. This option is ineffective for 3D block sizes.
550+
*/
551+
float tune_search_mode0_enable;
552+
545553
#if defined(ASTCENC_DIAGNOSTICS)
546554
/**
547555
* @brief The path to save the diagnostic trace data to.

Source/astcenc_compress_symbolic.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,9 +1282,10 @@ void compress_block(
12821282

12831283
static const float errorval_overshoot = 1.0f / ctx.config.tune_mse_overshoot;
12841284

1285-
// Only enable MODE0 fast path (trial 0) if 2D, and more than 25 texels
1285+
// Only enable MODE0 fast path if enabled
1286+
// Never enable for 3D blocks as no "always" block modes are available
12861287
int start_trial = 1;
1287-
if ((bsd.texel_count >= TUNE_MIN_TEXELS_MODE0_FASTPATH) && (bsd.zdim == 1))
1288+
if ((ctx.config.tune_search_mode0_enable >= TUNE_MIN_SEARCH_MODE0) && (bsd.zdim == 1))
12881289
{
12891290
start_trial = 0;
12901291
}

Source/astcenc_entry.cpp

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct astcenc_preset_config
5555
float tune_2partition_early_out_limit_factor;
5656
float tune_3partition_early_out_limit_factor;
5757
float tune_2plane_early_out_limit_correlation;
58+
float tune_search_mode0_enable;
5859
};
5960

6061
/**
@@ -63,22 +64,22 @@ struct astcenc_preset_config
6364
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
6465
{
6566
ASTCENC_PRE_FASTEST,
66-
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
67+
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
6768
}, {
6869
ASTCENC_PRE_FAST,
69-
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f
70+
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
7071
}, {
7172
ASTCENC_PRE_MEDIUM,
72-
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f
73+
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
7374
}, {
7475
ASTCENC_PRE_THOROUGH,
75-
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f
76+
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
7677
}, {
7778
ASTCENC_PRE_VERYTHOROUGH,
78-
4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
79+
4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
7980
}, {
8081
ASTCENC_PRE_EXHAUSTIVE,
81-
4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
82+
4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
8283
}
8384
}};
8485

@@ -88,22 +89,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_high {{
8889
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
8990
{
9091
ASTCENC_PRE_FASTEST,
91-
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f
92+
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
9293
}, {
9394
ASTCENC_PRE_FAST,
94-
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
95+
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
9596
}, {
9697
ASTCENC_PRE_MEDIUM,
97-
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f
98+
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
9899
}, {
99100
ASTCENC_PRE_THOROUGH,
100-
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f
101+
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
101102
}, {
102103
ASTCENC_PRE_VERYTHOROUGH,
103-
4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
104+
4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
104105
}, {
105106
ASTCENC_PRE_EXHAUSTIVE,
106-
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
107+
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
107108
}
108109
}};
109110

@@ -113,22 +114,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
113114
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
114115
{
115116
ASTCENC_PRE_FASTEST,
116-
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f
117+
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
117118
}, {
118119
ASTCENC_PRE_FAST,
119-
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f
120+
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
120121
}, {
121122
ASTCENC_PRE_MEDIUM,
122-
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f
123+
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
123124
}, {
124125
ASTCENC_PRE_THOROUGH,
125-
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f
126+
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
126127
}, {
127128
ASTCENC_PRE_VERYTHOROUGH,
128-
4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
129+
4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
129130
}, {
130131
ASTCENC_PRE_EXHAUSTIVE,
131-
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
132+
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
132133
}
133134
}};
134135

@@ -516,6 +517,7 @@ astcenc_error astcenc_config_init(
516517
config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
517518
config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
518519
config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
520+
config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
519521
}
520522
// Start and end node are not the same - so interpolate between them
521523
else
@@ -554,6 +556,7 @@ astcenc_error astcenc_config_init(
554556
config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
555557
config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
556558
config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
559+
config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
557560
#undef LERP
558561
#undef LERPI
559562
#undef LERPUI
@@ -581,6 +584,7 @@ astcenc_error astcenc_config_init(
581584
case ASTCENC_PRF_HDR_RGB_LDR_A:
582585
case ASTCENC_PRF_HDR:
583586
config.tune_db_limit = 999.0f;
587+
config.tune_search_mode0_enable = 0.0f;
584588
break;
585589
default:
586590
return ASTCENC_ERR_BAD_PROFILE;

Source/astcenc_internal.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,9 @@ static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
119119
static constexpr float ERROR_CALC_DEFAULT { 1e30f };
120120

121121
/**
122-
* @brief The minimum texel count for a block to use the one partition fast path.
123-
*
124-
* This setting skips 4x4 and 5x4 block sizes.
122+
* @brief The minimum tuning setting threshold for the one partition fast path.
125123
*/
126-
static constexpr unsigned int TUNE_MIN_TEXELS_MODE0_FASTPATH { 24 };
124+
static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f };
127125

128126
/**
129127
* @brief The maximum number of candidate encodings tested for each encoding mode.

0 commit comments

Comments
 (0)